257 lines
7.4 KiB
Diff
257 lines
7.4 KiB
Diff
From 3bdc76e4f35dc2f61ac788d829164367cc63fdb6 Mon Sep 17 00:00:00 2001
|
||
From: =?UTF-8?q?Zolt=C3=A1n=20Herczeg?= <hzmester@freemail.hu>
|
||
Date: Sat, 19 Sep 2020 03:49:32 +0000
|
||
Subject: [PATCH] Fixed a bug in character set matching when JIT is enabled.
|
||
|
||
Conflict:adapt context and delete Changelog
|
||
Reference:https://github.com/PhilipHazel/pcre2/commit/8144ae04e9c295641333f8f0087178e4e11432ec
|
||
---
|
||
src/pcre2_compile.c | 52 +++++++++++++++++++++++++++-------------
|
||
testdata/testinput1 | 9 +++++++
|
||
testdata/testinput11 | 3 +++
|
||
testdata/testinput2 | 2 --
|
||
testdata/testinput9 | 3 +++
|
||
testdata/testoutput1 | 12 ++++++++++
|
||
testdata/testoutput11-16 | 4 ++++
|
||
testdata/testoutput11-32 | 4 ++++
|
||
testdata/testoutput2 | 3 ---
|
||
testdata/testoutput9 | 4 ++++
|
||
10 files changed, 74 insertions(+), 22 deletions(-)
|
||
|
||
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
|
||
index d52b5d3..fa95e73 100644
|
||
--- a/src/pcre2_compile.c
|
||
+++ b/src/pcre2_compile.c
|
||
@@ -1398,32 +1398,47 @@ static BOOL
|
||
read_repeat_counts(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *minp,
|
||
uint32_t *maxp, int *errorcodeptr)
|
||
{
|
||
-PCRE2_SPTR p = *ptrptr;
|
||
+PCRE2_SPTR p;
|
||
BOOL yield = FALSE;
|
||
+BOOL had_comma = FALSE;
|
||
int32_t min = 0;
|
||
int32_t max = REPEAT_UNLIMITED; /* This value is larger than MAX_REPEAT_COUNT */
|
||
|
||
-/* NB read_number() initializes the error code to zero. The only error is for a
|
||
-number that is too big. */
|
||
+/* Check the syntax */
|
||
|
||
+*errorcodeptr = 0;
|
||
+for (p = *ptrptr;; p++)
|
||
+ {
|
||
+ uint32_t c;
|
||
+ if (p >= ptrend) return FALSE;
|
||
+ c = *p;
|
||
+ if (IS_DIGIT(c)) continue;
|
||
+ if (c == CHAR_RIGHT_CURLY_BRACKET) break;
|
||
+ if (c == CHAR_COMMA)
|
||
+ {
|
||
+ if (had_comma) return FALSE;
|
||
+ had_comma = TRUE;
|
||
+ }
|
||
+ else return FALSE;
|
||
+ }
|
||
+
|
||
+/* The only error from read_number() is for a number that is too big. */
|
||
+
|
||
+p = *ptrptr;
|
||
if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &min, errorcodeptr))
|
||
goto EXIT;
|
||
|
||
-if (p >= ptrend) goto EXIT;
|
||
-
|
||
if (*p == CHAR_RIGHT_CURLY_BRACKET)
|
||
{
|
||
p++;
|
||
max = min;
|
||
}
|
||
-
|
||
else
|
||
{
|
||
- if (*p++ != CHAR_COMMA || p >= ptrend) goto EXIT;
|
||
- if (*p != CHAR_RIGHT_CURLY_BRACKET)
|
||
+ if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
|
||
{
|
||
if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &max,
|
||
- errorcodeptr) || p >= ptrend || *p != CHAR_RIGHT_CURLY_BRACKET)
|
||
+ errorcodeptr))
|
||
goto EXIT;
|
||
if (max < min)
|
||
{
|
||
@@ -1438,11 +1453,10 @@ yield = TRUE;
|
||
if (minp != NULL) *minp = (uint32_t)min;
|
||
if (maxp != NULL) *maxp = (uint32_t)max;
|
||
|
||
-/* Update the pattern pointer on success, or after an error, but not when
|
||
-the result is "not a repeat quantifier". */
|
||
+/* Update the pattern pointer */
|
||
|
||
EXIT:
|
||
-if (yield || *errorcodeptr != 0) *ptrptr = p;
|
||
+*ptrptr = p;
|
||
return yield;
|
||
}
|
||
|
||
@@ -1776,19 +1790,23 @@ else
|
||
{
|
||
oldptr = ptr;
|
||
ptr--; /* Back to the digit */
|
||
- if (!read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, ERR61, &s,
|
||
- errorcodeptr))
|
||
- break;
|
||
|
||
- /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x
|
||
+ /* As we know we are at a digit, the only possible error from
|
||
+ read_number() is a number that is too large to be a group number. In this
|
||
+ case we fall through handle this as not a group reference. If we have
|
||
+ read a small enough number, check for a back reference.
|
||
+
|
||
+ \1 to \9 are always back references. \8x and \9x are too; \1x to \7x
|
||
are octal escapes if there are not that many previous captures. */
|
||
|
||
- if (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount)
|
||
+ if (read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, 0, &s, errorcodeptr) &&
|
||
+ (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount))
|
||
{
|
||
if (s > (int)MAX_GROUP_NUMBER) *errorcodeptr = ERR61;
|
||
else escape = -s; /* Indicates a back reference */
|
||
break;
|
||
}
|
||
+
|
||
ptr = oldptr; /* Put the pointer back and fall through */
|
||
}
|
||
|
||
diff --git a/testdata/testinput1 b/testdata/testinput1
|
||
index 8d952e2..8409791 100644
|
||
--- a/testdata/testinput1
|
||
+++ b/testdata/testinput1
|
||
@@ -6427,4 +6427,13 @@ ef) x/x,mark
|
||
"(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word"
|
||
word
|
||
|
||
+/a{1,2,3}b/
|
||
+ a{1,2,3}b
|
||
+
|
||
+/\214748364/
|
||
+ >\x{8c}748364<
|
||
+
|
||
+/a{65536/
|
||
+ >a{65536<
|
||
+
|
||
# End of testinput1
|
||
diff --git a/testdata/testinput11 b/testdata/testinput11
|
||
index 2d267d6..2bc8a25 100644
|
||
--- a/testdata/testinput11
|
||
+++ b/testdata/testinput11
|
||
@@ -368,4 +368,7 @@
|
||
ab<61>Az
|
||
ab\x{80000041}z
|
||
|
||
+/(?i:A{1,}\6666666666)/
|
||
+ A\x{1b6}6666666
|
||
+
|
||
# End of testinput11
|
||
diff --git a/testdata/testinput2 b/testdata/testinput2
|
||
index 47320eb..695bb60 100644
|
||
--- a/testdata/testinput2
|
||
+++ b/testdata/testinput2
|
||
@@ -2189,8 +2189,6 @@
|
||
|
||
/a(*MARK)b/
|
||
|
||
-/(?i:A{1,}\6666666666)/
|
||
-
|
||
/\g6666666666/
|
||
|
||
/[\g6666666666]/B
|
||
diff --git a/testdata/testinput9 b/testdata/testinput9
|
||
index 7be4b15..4eb228a 100644
|
||
--- a/testdata/testinput9
|
||
+++ b/testdata/testinput9
|
||
@@ -260,4 +260,7 @@
|
||
|
||
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
|
||
|
||
+/(?i:A{1,}\6666666666)/
|
||
+ A\x{1b6}6666666
|
||
+
|
||
# End of testinput9
|
||
diff --git a/testdata/testoutput1 b/testdata/testoutput1
|
||
index 470e412..666ae8c 100644
|
||
--- a/testdata/testoutput1
|
||
+++ b/testdata/testoutput1
|
||
@@ -10186,4 +10186,16 @@ No match
|
||
word
|
||
0: word
|
||
|
||
+/a{1,2,3}b/
|
||
+ a{1,2,3}b
|
||
+ 0: a{1,2,3}b
|
||
+
|
||
+/\214748364/
|
||
+ >\x{8c}748364<
|
||
+ 0: \x8c748364
|
||
+
|
||
+/a{65536/
|
||
+ >a{65536<
|
||
+ 0: a{65536
|
||
+
|
||
# End of testinput1
|
||
diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16
|
||
index 78bf7fb..8768785 100644
|
||
--- a/testdata/testoutput11-16
|
||
+++ b/testdata/testoutput11-16
|
||
@@ -661,4 +661,8 @@ Subject length lower bound = 1
|
||
ab<61>Az
|
||
ab\x{80000041}z
|
||
|
||
+/(?i:A{1,}\6666666666)/
|
||
+ A\x{1b6}6666666
|
||
+ 0: A\x{1b6}6666666
|
||
+
|
||
# End of testinput11
|
||
diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
|
||
index 4b00384..2c95f61 100644
|
||
--- a/testdata/testoutput11-32
|
||
+++ b/testdata/testoutput11-32
|
||
@@ -667,4 +667,8 @@ Subject length lower bound = 1
|
||
ab\x{80000041}z
|
||
0: ab\x{80000041}z
|
||
|
||
+/(?i:A{1,}\6666666666)/
|
||
+ A\x{1b6}6666666
|
||
+ 0: A\x{1b6}6666666
|
||
+
|
||
# End of testinput11
|
||
diff --git a/testdata/testoutput2 b/testdata/testoutput2
|
||
index c06363a..0fdbe13 100644
|
||
--- a/testdata/testoutput2
|
||
+++ b/testdata/testoutput2
|
||
@@ -8374,9 +8374,6 @@ No match
|
||
/a(*MARK)b/
|
||
Failed: error 166 at offset 7: (*MARK) must have an argument
|
||
|
||
-/(?i:A{1,}\6666666666)/
|
||
-Failed: error 161 at offset 19: subpattern number is too big
|
||
-
|
||
/\g6666666666/
|
||
Failed: error 161 at offset 7: subpattern number is too big
|
||
|
||
diff --git a/testdata/testoutput9 b/testdata/testoutput9
|
||
index f98f276..1ec4317 100644
|
||
--- a/testdata/testoutput9
|
||
+++ b/testdata/testoutput9
|
||
@@ -367,4 +367,8 @@ Failed: error 134 at offset 14: character code point value in \x{} or \o{} is to
|
||
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
|
||
Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
||
|
||
+/(?i:A{1,}\6666666666)/
|
||
+Failed: error 151 at offset 13: octal value is greater than \377 in 8-bit non-UTF-8 mode
|
||
+ A\x{1b6}6666666
|
||
+
|
||
# End of testinput9
|
||
--
|
||
2.27.0
|
||
|