tcl/Bug-fix-in-Tcl_UtfAtIndex-for-TCL_UTF_MAX-4-only-.-W.patch

81 lines
2.8 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From f2343ead74b78173ed8b13543107a689c408e908 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Mon, 23 Apr 2018 23:23:00 +0000
Subject: [PATCH 1121/1800] Bug-fix in Tcl_UtfAtIndex (for TCL_UTF_MAX=4 only).
With test-case (in "string totitle") demonstrating the bug.
---
generic/tclUtf.c | 8 ++++++++
tests/string.test | 11 +++++++++--
2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 0d88d36b3..c08464b9d 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -762,10 +762,18 @@ Tcl_UtfAtIndex(
register int index) /* The position of the desired character. */
{
Tcl_UniChar ch = 0;
+ int len = 1;
while (index-- > 0) {
+ len = TclUtfToUniChar(src, &ch);
+ src += len;
+ }
+#if TCL_UTF_MAX == 4
+ if (!len) {
+ /* Index points at character following High Surrogate */
src += TclUtfToUniChar(src, &ch);
}
+#endif
return src;
}
diff --git a/tests/string.test b/tests/string.test
index d69fda44d..868fc25fc 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -24,7 +24,7 @@ catch [list package require -exact Tcltest [info patchlevel]]
testConstraint testobj [expr {[info commands testobj] != {}}]
testConstraint testindexobj [expr {[info commands testindexobj] != {}}]
-testConstraint fullutf [expr {[format %c 0x010000] != "\ufffd"}]
+testConstraint tip389 [expr {[string length \U010000] == 2}]
# Used for constraining memory leak tests
testConstraint memory [llength [info commands memory]]
@@ -291,6 +291,9 @@ test string-5.19 {string index, bytearray object out of bounds} {
test string-5.20 {string index, bytearray object out of bounds} {
string index [binary format I* {0x50515253 0x52}] 20
} {}
+test string-5.21 {string index, surrogates, bug [11ae2be95dac9417]} tip389 {
+ list [string index a\U100000b 1] [string index a\U100000b 2] [string index a\U100000b 3]
+} [list \U100000 {} b]
proc largest_int {} {
@@ -1280,7 +1283,7 @@ test string-12.22 {string range, shimmering binary/index} {
binary scan $s a* x
string range $s $s end
} 000000001
-test string-12.23 {string range, surrogates, bug [11ae2be95dac9417]} fullutf {
+test string-12.23 {string range, surrogates, bug [11ae2be95dac9417]} tip389 {
list [string range a\U100000b 1 1] [string range a\U100000b 2 2] [string range a\U100000b 3 3]
} [list \U100000 {} b]
@@ -1477,6 +1480,10 @@ test string-17.7 {string totitle, unicode} {
test string-17.8 {string totitle, compiled} {
lindex [string totitle [list aa bb [list cc]]] 0
} Aa
+test string-17.9 {string totitle, surrogates, bug [11ae2be95dac9417]} tip389 {
+ list [string totitle a\U118c0c 1 1] [string totitle a\U118c0c 2 2] \
+ [string totitle a\U118c0c 3 3]
+} [list a\U118a0c a\U118c0C a\U118c0C]
test string-18.1 {string trim} {
list [catch {string trim} msg] $msg
--
2.19.1