tcl/Fix-behavior-of-Tcl_GetRange-and-string-range-regard.patch

69 lines
2.4 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 4199a703dccda415778cc3431696a6bed57ab15c Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Thu, 11 Jan 2018 15:44:05 +0000
Subject: [PATCH 0834/1800] Fix behavior of Tcl_GetRange() and "string range"
regarding surrogates, when Tcl is compiled with -DTCL_UTF_MAX=4. Partial fix
for bug [11ae2be95dac9417]. Also, fix typo.
---
generic/tclStringObj.c | 13 ++++++++++++-
tests/string.test | 4 ++++
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 0f238cfb1..1b35c567b 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -533,7 +533,7 @@ Tcl_GetUniChar(
*
* Get the Unicode form of the String object. If the object is not
* already a String object, it will be converted to one. If the String
- * object does not have a Unicode rep, then one is create from the UTF
+ * object does not have a Unicode rep, then one is created from the UTF
* string format.
*
* Results:
@@ -667,6 +667,17 @@ Tcl_GetRange(
stringPtr = GET_STRING(objPtr);
}
+#if TCL_UTF_MAX == 4
+ /* See: bug [11ae2be95dac9417] */
+ if ((first>0) && ((stringPtr->unicode[first]&0xFC00) == 0xDC00)
+ && ((stringPtr->unicode[first-1]&0xFC00) == 0xD800)) {
+ ++first;
+ }
+ if ((last+1<stringPtr->numChars) && ((stringPtr->unicode[last+1]&0xFC00) == 0xDC00)
+ && ((stringPtr->unicode[last]&0xFC00) == 0xD800)) {
+ ++last;
+ }
+#endif
return Tcl_NewUnicodeObj(stringPtr->unicode + first, last-first+1);
}
diff --git a/tests/string.test b/tests/string.test
index bbba5ebec..53f1cfb73 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -24,6 +24,7 @@ catch [list package require -exact Tcltest [info patchlevel]]
testConstraint testobj [expr {[info commands testobj] != {}}]
testConstraint testindexobj [expr {[info commands testindexobj] != {}}]
+testConstraint fullutf [expr {[format %c 0x010000] != "\ufffd"}]
# Used for constraining memory leak tests
testConstraint memory [llength [info commands memory]]
@@ -1276,6 +1277,9 @@ test string-12.22 {string range, shimmering binary/index} {
binary scan $s a* x
string range $s $s end
} 000000001
+test string-12.23 {string range, surrogates, bug [11ae2be95dac9417]} fullutf {
+ list [string range a\U100000b 1 1] [string range a\U100000b 2 2] [string range a\U100000b 3 3]
+} [list \U100000 {} b]
test string-13.1 {string repeat} {
list [catch {string repeat} msg] $msg
--
2.19.1