Compare commits

...

10 Commits

Author SHA1 Message Date
openeuler-ci-bot
6f8a47c429
!125 sync patches from upstream
From: @hugel 
Reviewed-by: @xujing99 
Signed-off-by: @xujing99
2024-12-04 06:33:28 +00:00
hugel
9ce133a073 sync patches from upstream 2024-12-04 11:55:36 +08:00
openeuler-ci-bot
ee47835a6b
!94 pcre2: Fixing an issue using empty character sets in jit
From: @xujing99 
Reviewed-by: @licunlong 
Signed-off-by: @licunlong
2024-05-07 12:14:17 +00:00
xujing
93c4200dca pcre2: Fixing an issue using empty character sets in jit 2024-05-07 16:48:14 +08:00
openeuler-ci-bot
340be1f0d1
!84 pcre2: sync patches from upstream
From: @xujing99 
Reviewed-by: @openeuler-basic 
Signed-off-by: @openeuler-basic
2024-02-06 02:41:37 +00:00
xujing
c7467220f6 pcre2: sync patches from upstream 2024-02-06 10:24:12 +08:00
openeuler-ci-bot
1931ddf097
!76 [sync] PR-75: pcre2: sync patches from upstream to fix some bugs
From: @openeuler-sync-bot 
Reviewed-by: @licunlong 
Signed-off-by: @licunlong
2024-01-22 12:34:43 +00:00
xujing
b9e38fb9f7 pcre2: sync patches from upstream to fix some bugs
(cherry picked from commit ac715acc044f50e935dc03cc6d58bb1577b6a0f3)
2024-01-22 19:06:06 +08:00
openeuler-ci-bot
7663d30fb3
!62 [sync] PR-60: fix a possible integer overflow in DFA matching (#305)
From: @openeuler-sync-bot 
Reviewed-by: @openeuler-basic 
Signed-off-by: @openeuler-basic
2023-12-14 07:27:59 +00:00
xujing
76141f2f97 fix a possible integer overflow in DFA matching (#305)
(cherry picked from commit 2a38eb8cf0ea512b022a3223debb76724d3a22d2)
2023-12-14 12:46:02 +08:00
23 changed files with 2374 additions and 1 deletions

View File

@ -0,0 +1,69 @@
From f334e76dc765f23670e957413bae18c9d20b1d82 Mon Sep 17 00:00:00 2001
From: Nicholas Wilson <nicholas@nicholaswilson.me.uk>
Date: Mon, 16 Sep 2024 17:38:40 +0100
Subject: [PATCH] Add Perl titlecasing (#475)
---
src/pcre2_substitute.c | 11 +++++++++++
testdata/testinput2 | 3 +++
testdata/testoutput2 | 4 ++++
3 files changed, 18 insertions(+)
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
index 1ccef0660..83ddb8364 100644
--- a/src/pcre2_substitute.c
+++ b/src/pcre2_substitute.c
@@ -839,6 +839,12 @@ do
forcecase = -1;
forcecasereset = 0;
ptr += 2;
+ if (ptr + 2 < repend && ptr[0] == CHAR_BACKSLASH && ptr[1] == CHAR_U)
+ {
+ /* Perl title-casing feature for \l\U (and \u\L) */
+ forcecasereset = 1;
+ ptr += 2;
+ }
continue;
case CHAR_U:
@@ -850,6 +856,11 @@ do
forcecase = 1;
forcecasereset = 0;
ptr += 2;
+ if (ptr + 2 < repend && ptr[0] == CHAR_BACKSLASH && ptr[1] == CHAR_L)
+ {
+ forcecasereset = -1;
+ ptr += 2;
+ }
continue;
default:
diff --git a/testdata/testinput2 b/testdata/testinput2
index 51e2095c8..7a836c994 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4612,6 +4612,9 @@ B)x/alt_verbnames,mark
/a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended
abcDE
+/(Hello)|wORLD/g,replace=>${1:+\l\U$0:\u\L$0}<,substitute_extended
+ Hello between wORLD
+
/abcd/replace=xy\kz,substitute_extended
abcd
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index eeb635d6d..7c71866b7 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14854,6 +14854,10 @@ No match
abcDE
1: aBcBCbcdEdeabAByzDone
+/(Hello)|wORLD/g,replace=>${1:+\l\U$0:\u\L$0}<,substitute_extended
+ Hello between wORLD
+ 2: >hELLO< between >World<
+
/abcd/replace=xy\kz,substitute_extended
abcd
Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string

View File

@ -0,0 +1,119 @@
From c1306126c3f12c16ad62dd2553132f64a28ca607 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Sun, 19 Nov 2023 17:18:07 +0000
Subject: [PATCH] Fix 32-bit quantifier following a character larger than the
maximum UTF character.
Conflict:don't modify ChangeLog; adapt context
Reference:https://github.com/PCRE2Project/pcre2/commit/c1306126c3f12c16ad62dd2553132f64a28ca607
---
src/pcre2_compile.c | 11 ++++++++---
testdata/testinput12 | 6 ++++++
testdata/testoutput12-16 | 7 +++++++
testdata/testoutput12-32 | 7 +++++++
4 files changed, 28 insertions(+), 3 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index b3e4969..fdaf2ad 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -2781,6 +2781,7 @@ uint32_t *verbstartptr = NULL;
uint32_t *previous_callout = NULL;
uint32_t *parsed_pattern = cb->parsed_pattern;
uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
+uint32_t *this_parsed_item = NULL;
uint32_t meta_quantifier = 0;
uint32_t add_after_mark = 0;
uint32_t extra_options = cb->cx->extra_options;
@@ -2866,10 +2867,11 @@ while (ptr < ptrend)
uint32_t set, unset, *optset;
uint32_t terminator;
uint32_t prev_meta_quantifier;
+ uint32_t *prev_parsed_item = this_parsed_item;
BOOL prev_okquantifier;
PCRE2_SPTR tempptr;
PCRE2_SIZE offset;
-
+
if (parsed_pattern >= parsed_pattern_end)
{
errorcode = ERR63; /* Internal error (parsed pattern overflow) */
@@ -2881,6 +2883,10 @@ while (ptr < ptrend)
errorcode = ERR19;
goto FAILED; /* Parentheses too deeply nested */
}
+
+ /* Remember where this item started */
+
+ this_parsed_item = parsed_pattern;
/* Get next input character, save its position for callout handling. */
@@ -3173,7 +3179,6 @@ while (ptr < ptrend)
continue; /* Next character in pattern */
}
-
/* Process the next item in the main part of a pattern. */
switch(c)
@@ -3450,7 +3455,7 @@ while (ptr < ptrend)
wrapping it in non-capturing brackets, but we have to allow for a preceding
(*MARK) for when (*ACCEPT) has an argument. */
- if (parsed_pattern[-1] == META_ACCEPT)
+ if (*prev_parsed_item == META_ACCEPT)
{
uint32_t *p;
for (p = parsed_pattern - 1; p >= verbstartptr; p--) p[1] = p[0];
diff --git a/testdata/testinput12 b/testdata/testinput12
index 7a85eb5..1e552e6 100644
--- a/testdata/testinput12
+++ b/testdata/testinput12
@@ -560,4 +560,10 @@
# ----------------------------------------------------
+# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
+# fails in 16-bit mode, but is OK for 32-bit.
+
+/\x{802a0000}*/
+ \x{802a0000}\x{802a0000}
+
# End of testinput12
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index 9867632..8cbc13d 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1803,4 +1803,11 @@ No match
# ----------------------------------------------------
+# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
+# fails in 16-bit mode, but is OK for 32-bit.
+
+/\x{802a0000}*/
+Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
+ \x{802a0000}\x{802a0000}
+
# End of testinput12
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index 3a20dd4..1a98b4b 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1801,4 +1801,11 @@ No match
# ----------------------------------------------------
+# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
+# fails in 16-bit mode, but is OK for 32-bit.
+
+/\x{802a0000}*/
+ \x{802a0000}\x{802a0000}
+ 0: \x{802a0000}\x{802a0000}
+
# End of testinput12
--
2.23.0

View File

@ -0,0 +1,94 @@
From 45dcb3de900b77583f4e9daa663004c55fad4794 Mon Sep 17 00:00:00 2001
From: Zoltan Herczeg <hzmester@freemail.hu>
Date: Wed, 22 Nov 2023 10:22:59 +0000
Subject: [PATCH] Fix \X matching in 32 bit mode without UTF in JIT
Conflict:don't modify ChangeLog; adapt context
Reference:https://github.com/PCRE2Project/pcre2/commit/45dcb3de900b77583f4e9daa663004c55fad4794
---
src/pcre2_jit_compile.c | 6 +++---
testdata/testinput12 | 4 ++++
testdata/testoutput12-16 | 9 +++++++++
testdata/testoutput12-32 | 5 +++++
4 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 510c392..8d64e1c 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -8718,7 +8718,7 @@ c = *cc++;
#if PCRE2_CODE_UNIT_WIDTH == 32
if (c >= 0x110000)
- return NULL;
+ return cc;
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
lgb = UCD_GRAPHBREAK(c);
@@ -8958,7 +8958,7 @@ switch(type)
#else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
- if (!common->utf || common->invalid_utf)
+ if (common->invalid_utf)
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
#endif
@@ -12044,7 +12044,7 @@ switch(opcode)
}
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf)
+ if (type == OP_EXTUNI || common->utf)
{
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
detect_partial_match(common, &no_match);
diff --git a/testdata/testinput12 b/testdata/testinput12
index 5a2d8d2..a6678bb 100644
--- a/testdata/testinput12
+++ b/testdata/testinput12
@@ -569,4 +569,8 @@
/\x{802a0000}*/
\x{802a0000}\x{802a0000}
+# UTF matching without UTF, check invalid UTF characters
+/\X++/
+ a\x{110000}\x{ffffffff}
+
# End of testinput12
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index 9ac403e..f3b40a3 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1814,4 +1814,13 @@ No match
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
\x{802a0000}\x{802a0000}
+# UTF matching without UTF, check invalid UTF characters
+/\X++/
+ a\x{110000}\x{ffffffff}
+** Character \x{110000} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+ 0: a\x00\x{ffff}
+
# End of testinput12
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index 9396305..dd42f86 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1812,4 +1812,9 @@ No match
\x{802a0000}\x{802a0000}
0: \x{802a0000}\x{802a0000}
+# UTF matching without UTF, check invalid UTF characters
+/\X++/
+ a\x{110000}\x{ffffffff}
+ 0: a\x{110000}\x{ffffffff}
+
# End of testinput12
--
2.23.0

View File

@ -0,0 +1,78 @@
From 1c09efe6b0008a3b463299efe7501bc3140806f3 Mon Sep 17 00:00:00 2001
From: Zoltan Herczeg <hzmester@freemail.hu>
Date: Wed, 6 Dec 2023 10:06:50 +0000
Subject: [PATCH] Fix accept and endanchored interaction in JIT
Conflict:don't modify ChangeLog
Reference:https://github.com/PCRE2Project/pcre2/commit/1c09efe6b0008a3b463299efe7501bc3140806f3
---
src/pcre2_jit_compile.c | 15 ++++++++++++---
src/pcre2_jit_test.c | 1 +
2 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 2e11c3c..849e2c8 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -489,6 +489,8 @@ typedef struct compiler_common {
jump_list *casefulcmp;
jump_list *caselesscmp;
jump_list *reset_match;
+ /* Same as reset_match, but resets the STR_PTR as well. */
+ jump_list *restart_match;
BOOL unset_backref;
BOOL alt_circumflex;
#ifdef SUPPORT_UNICODE
@@ -3146,7 +3148,7 @@ return (value & (value - 1)) == 0;
static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
{
-while (list)
+while (list != NULL)
{
/* sljit_set_label is clever enough to do nothing
if either the jump or the label is NULL. */
@@ -12187,7 +12189,7 @@ if (*cc == OP_FAIL)
}
if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
- add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
+ add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
{
@@ -14552,10 +14554,17 @@ if (common->caselesscmp != NULL)
set_jumps(common->caselesscmp, LABEL());
do_caselesscmp(common);
}
-if (common->reset_match != NULL)
+if (common->reset_match != NULL || common->restart_match != NULL)
{
+ if (common->restart_match != NULL)
+ {
+ set_jumps(common->restart_match, LABEL());
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
+ }
+
set_jumps(common->reset_match, LABEL());
do_reset_match(common, (re->top_bracket + 1) * 2);
+ /* The value of restart_match is in TMP1. */
CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
JUMPTO(SLJIT_JUMP, reset_match_label);
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
index b5d95d5..0974d19 100644
--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
@@ -655,6 +655,7 @@ static struct regression_test_case regression_test_cases[] = {
{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
+ { MU | PCRE2_ENDANCHORED, A, 0, 0, "aa(*ACCEPT)aa", "aaa" },
/* Conditional blocks. */
{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
--
2.23.0

View File

@ -0,0 +1,31 @@
From 04f6668a09c51cf10fa5514019843ab0af9724c8 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Tue, 21 Nov 2023 15:10:34 +0000
Subject: [PATCH] Fix another oversight in c1306126
Conflict:NA
Reference:https://github.com/PCRE2Project/pcre2/commit/04f6668a09c51cf10fa5514019843ab0af9724c8
---
src/pcre2_compile.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 9e45580..7b522c5 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -3108,8 +3108,11 @@ while (ptr < ptrend)
!read_repeat_counts(&tempptr, ptrend, NULL, NULL, &errorcode))))
{
if (after_manual_callout-- <= 0)
+ {
parsed_pattern = manage_callouts(thisptr, &previous_callout, auto_callout,
parsed_pattern, cb);
+ this_parsed_item = parsed_pattern; /* New start for current item */
+ }
}
/* If expect_cond_assert is 2, we have just passed (?( and are expecting an
--
2.23.0

View File

@ -0,0 +1,46 @@
From 936fef2a4480b21f5c43b207181097736fb311e3 Mon Sep 17 00:00:00 2001
From: Zoltan Herczeg <hzmester@freemail.hu>
Date: Wed, 22 Nov 2023 11:50:38 +0000
Subject: [PATCH] Fix backref iterators when PCRE2_MATCH_UNSET_BACKREF is set
in JIT
Conflict:don't modify ChangeLog
Reference:https://github.com/PCRE2Project/pcre2/commit/936fef2a4480b21f5c43b207181097736fb311e3
---
src/pcre2_jit_compile.c | 4 +++-
src/pcre2_jit_test.c | 1 +
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 8d64e1c..8110d8c 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -9539,9 +9539,11 @@ if (!minimize)
if (ref)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+
if (ref)
{
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+ if (!common->unset_backref)
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
}
else
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
index b27cec7..8bff3dc 100644
--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
@@ -595,6 +595,7 @@ static struct regression_test_case regression_test_cases[] = {
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
+ { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" },
/* Assertions. */
{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
--
2.23.0

View File

@ -0,0 +1,44 @@
From 9de4d53cf850e0fca625ce9d80c12bea5b2a5ab9 Mon Sep 17 00:00:00 2001
From: Zoltan Herczeg <hzmester@freemail.hu>
Date: Thu, 7 Dec 2023 09:03:24 +0000
Subject: [PATCH] Fix backreferences with unset backref and non-greedy
iterators in JIT
Conflict:don't modify ChangeLog; modify topbacktracks instead of
own_backtracks because c3529d0227e is not merged
Reference:https://github.com/PCRE2Project/pcre2/commit/9de4d53cf850e0fca625ce9d80c12bea5b2a5ab9
---
src/pcre2_jit_compile.c | 3 ++-
src/pcre2_jit_test.c | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 0f445e1..e1daa1e 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -9653,7 +9653,8 @@ else
{
if (ref)
{
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+ if (!common->unset_backref)
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
}
else
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
index 0974d19..9b63c8e 100644
--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
@@ -596,6 +596,7 @@ static struct regression_test_case regression_test_cases[] = {
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" },
+ { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\1+?()", "" },
/* Assertions. */
{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
--
2.23.0

View File

@ -0,0 +1,90 @@
From 57ee073252dc826dbe412846a83421d2bb4483bc Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Wed, 22 Nov 2023 11:34:27 +0000
Subject: [PATCH] Fix bad patch in 05206d66. The interpreter was handling
NOTEOL incorrectly in general after trying to fix it in invalid UTF subjects.
Conflict:adapt context
Reference:https://github.com/PCRE2Project/pcre2/commit/57ee073252dc826dbe412846a83421d2bb4483bc
---
src/pcre2_intmodedep.h | 3 ++-
src/pcre2_match.c | 7 +++----
testdata/testinput2 | 4 ++++
testdata/testoutput2 | 6 ++++++
4 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h
index 5e7e10d..423764d 100644
--- a/src/pcre2_intmodedep.h
+++ b/src/pcre2_intmodedep.h
@@ -880,7 +880,8 @@ typedef struct match_block {
PCRE2_SPTR start_code; /* For use when recursing */
PCRE2_SPTR start_subject; /* Start of the subject string */
PCRE2_SPTR check_subject; /* Where UTF-checked from */
- PCRE2_SPTR end_subject; /* End of the subject string */
+ PCRE2_SPTR end_subject; /* Usable end of the subject string */
+ PCRE2_SPTR true_end_subject; /* Actual end of the subject string */
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index ea03976..c5e84ce 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -6076,12 +6076,10 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
/* Fall through */
- /* Unconditional end of subject assertion (\z). We must check NOTEOL
- because it gets set for invalid UTF fragments. */
+ /* Unconditional end of subject assertion (\z). */
case OP_EOD:
- if (Feptr < mb->end_subject || (mb->moptions & PCRE2_NOTEOL) != 0)
- RRETURN(MATCH_NOMATCH);
+ if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH);
if (mb->partial != 0)
{
mb->hitend = TRUE;
@@ -6891,6 +6889,7 @@ mb->callout_data = mcontext->callout_data;
mb->start_subject = subject;
mb->start_offset = start_offset;
mb->end_subject = end_subject;
+mb->true_end_subject = true_end_subject;
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
mb->allowemptypartial = (re->max_lookbehind > 0) ||
(re->flags & PCRE2_MATCH_EMPTY) != 0;
diff --git a/testdata/testinput2 b/testdata/testinput2
index 0e24e78..b874f20 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -6055,4 +6055,8 @@ a)"xI
/A +/extended
+/a\z/
+ a
+ a\=noteol
+
# End of testinput2
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 68800fb..c1bc0e6 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -17946,6 +17946,12 @@ No match
/A +/extended
+/a\z/
+ a
+ 0: a
+ a\=noteol
+ 0: a
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
--
2.23.0

View File

@ -0,0 +1,80 @@
From afce00e484cff118a824dac498e8044680dac401 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Fri, 1 Dec 2023 16:49:59 +0000
Subject: [PATCH] Fix compile loop in 32-bit mode for characters above the
Unicode limit when caseless and ucp are set.
Conflict:don't modify ChangeLog; adapt context
Reference:https://github.com/PCRE2Project/pcre2/commit/afce00e484cff118a824dac498e8044680dac401
---
src/pcre2_compile.c | 6 +++++-
testdata/testinput12 | 4 ++++
testdata/testoutput12-16 | 5 +++++
testdata/testoutput12-32 | 5 +++++
4 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 7b522c5..1935e76 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -5155,10 +5155,14 @@ unsigned int co;
unsigned int co;
/* Find the first character that has an other case. If it has multiple other
-cases, return its case offset value. */
+cases, return its case offset value. In 32-bit mode, a value
+greater than the Unicode maximum ends the range. */
for (c = *cptr; c <= d; c++)
{
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (c > MAX_UTF_CODE_POINT) return -1;
+#endif
if ((co = UCD_CASESET(c)) != 0)
{
*ocptr = c++; /* Character that has the set */
diff --git a/testdata/testinput12 b/testdata/testinput12
index a6678bb..de3d406 100644
--- a/testdata/testinput12
+++ b/testdata/testinput12
@@ -573,4 +573,8 @@
/\X++/
a\x{110000}\x{ffffffff}
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
+/[\x{ffffffff}]/caseless,ucp
+ \x{ffffffff}xyz
+
# End of testinput12
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index f3b40a3..9fa93fa 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1823,4 +1823,9 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to
** Truncation will probably give the wrong result.
0: a\x00\x{ffff}
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
+/[\x{ffffffff}]/caseless,ucp
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
+ \x{ffffffff}xyz
+
# End of testinput12
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index dd42f86..721d8bc 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1817,4 +1817,9 @@ No match
a\x{110000}\x{ffffffff}
0: a\x{110000}\x{ffffffff}
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
+/[\x{ffffffff}]/caseless,ucp
+ \x{ffffffff}xyz
+ 0: \x{ffffffff}
+
# End of testinput12
--
2.23.0

View File

@ -0,0 +1,65 @@
From 542cb11242cfc9be9b6218965751bfbb13a8b6a2 Mon Sep 17 00:00:00 2001
From: Zoltan Herczeg <hzmester@freemail.hu>
Date: Wed, 27 Dec 2023 08:27:17 +0000
Subject: [PATCH] Fix incorrect class character matches in JIT
---
src/pcre2_jit_compile.c | 8 ++++++++
src/pcre2_jit_test.c | 1 +
2 files changed, 9 insertions(+)
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 160b448..6d0d9a6 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -7938,6 +7938,7 @@ while (*cc != XCL_END)
jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
+ c = charoffset;
/* In case of ucp_Cf, we overwrite the result. */
SET_CHAR_OFFSET(0x2066);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
@@ -7949,6 +7950,9 @@ while (*cc != XCL_END)
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+ /* Restore charoffset */
+ SET_CHAR_OFFSET(c);
+
JUMPHERE(jump);
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
break;
@@ -7964,6 +7968,7 @@ while (*cc != XCL_END)
jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
+ c = charoffset;
/* In case of ucp_Cf, we overwrite the result. */
SET_CHAR_OFFSET(0x2066);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
@@ -7972,6 +7977,9 @@ while (*cc != XCL_END)
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+ /* Restore charoffset */
+ SET_CHAR_OFFSET(c);
+
JUMPHERE(jump);
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
break;
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
index 801929b..f6646b7 100644
--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
@@ -412,6 +412,7 @@ static struct regression_test_case regression_test_cases[] = {
{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB baaa" },
{ CMUP, 0, 0, 0, "[^S]\\B", "\xe2\x80\x8a" },
{ MUP, 0, 0, 0 | F_NOMATCH, "[^\\p{Hangul}\\p{Z}]", " " },
+ { MUP, 0, 0, 0 | F_NOMATCH, "[^[:print:]\\x{f6f6}]", "\xef\x9b\xb6" },
/* Possible empty brackets. */
{ MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
--
2.33.0

View File

@ -0,0 +1,461 @@
From ad73148dfb6d06280a4d87f322991762aff90a55 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Mon, 4 Dec 2023 16:11:41 +0000
Subject: [PATCH] Fix incorrect matching of 0xffffffff to any character with
more than one other case in 32-bit UCP (but not UTF) mode.
Conflict:don't modify ChangeLog; use "Lctype == OP_NOTPROP" instead of
"notmatch" because 92d7cf1dd04 is not merged
Reference:https://github.com/PCRE2Project/pcre2/commit/ad73148dfb6d06280a4d87f322991762aff90a55
---
src/pcre2_dfa_match.c | 28 ++++++++++++++++++++++++++
src/pcre2_match.c | 43 ++++++++++++++++++++++++++++++++++------
testdata/testinput12 | 26 ++++++++++++++++++++++++
testdata/testinput14 | 27 +++++++++++++++++++++++++
testdata/testoutput12-16 | 37 ++++++++++++++++++++++++++++++++++
testdata/testoutput12-32 | 33 ++++++++++++++++++++++++++++++
testdata/testoutput14-16 | 38 +++++++++++++++++++++++++++++++++++
testdata/testoutput14-32 | 34 +++++++++++++++++++++++++++++++
testdata/testoutput14-8 | 38 +++++++++++++++++++++++++++++++++++
9 files changed, 298 insertions(+), 6 deletions(-)
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
index 1c48ad6..caae652 100644
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@@ -1241,6 +1241,13 @@ for (;;)
break;
case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (c > MAX_UTF_CODE_POINT)
+ {
+ OK = FALSE;
+ break;
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + code[2];
for (;;)
{
@@ -1516,6 +1523,13 @@ for (;;)
break;
case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (c > MAX_UTF_CODE_POINT)
+ {
+ OK = FALSE;
+ break;
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + code[3];
for (;;)
{
@@ -1774,6 +1788,13 @@ for (;;)
break;
case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (c > MAX_UTF_CODE_POINT)
+ {
+ OK = FALSE;
+ break;
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + code[3];
for (;;)
{
@@ -2058,6 +2079,13 @@ for (;;)
break;
case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (c > MAX_UTF_CODE_POINT)
+ {
+ OK = FALSE;
+ break;
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
for (;;)
{
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index d162e70..b2e1f23 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -2565,6 +2565,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
break;
case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (fc > MAX_UTF_CODE_POINT)
+ {
+ if (Fop == OP_NOTPROP) break;;
+ RRETURN(MATCH_NOMATCH);
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + Fecode[2];
for (;;)
{
@@ -2885,6 +2892,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (fc > MAX_UTF_CODE_POINT)
+ {
+ if (Fop == OP_NOTPROP) continue;
+ RRETURN(MATCH_NOMATCH);
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + Lpropvalue;
for (;;)
{
@@ -3698,6 +3712,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (fc > MAX_UTF_CODE_POINT)
+ {
+ if (Lctype == OP_NOTPROP) continue;
+ RRETURN(MATCH_NOMATCH);
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + Lpropvalue;
for (;;)
{
@@ -4278,14 +4299,24 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
break;
}
GETCHARLENTEST(fc, Feptr, len);
- cp = PRIV(ucd_caseless_sets) + Lpropvalue;
- for (;;)
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (fc > MAX_UTF_CODE_POINT)
{
- if (fc < *cp)
- { if (Lctype == OP_NOTPROP) break; else goto GOT_MAX; }
- if (fc == *cp++)
- { if (Lctype == OP_NOTPROP) goto GOT_MAX; else break; }
+ if (Lctype != OP_NOTPROP) goto GOT_MAX;
}
+ else
+#endif
+ {
+ cp = PRIV(ucd_caseless_sets) + Lpropvalue;
+ for (;;)
+ {
+ if (fc < *cp)
+ { if (Lctype == OP_NOTPROP) break; else goto GOT_MAX; }
+ if (fc == *cp++)
+ { if (Lctype == OP_NOTPROP) goto GOT_MAX; else break; }
+ }
+ }
+
Feptr += len;
}
GOT_MAX:
diff --git a/testdata/testinput12 b/testdata/testinput12
index de3d406..85550c3 100644
--- a/testdata/testinput12
+++ b/testdata/testinput12
@@ -576,5 +576,31 @@
# This used to loop in 32-bit mode; it will fail in 16-bit mode.
/[\x{ffffffff}]/caseless,ucp
\x{ffffffff}xyz
+
+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
+# will give errors in 16-bit mode.
+
+/k*\x{ffffffff}/caseless,ucp
+ \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+
+# ---------------------------------------------------------
# End of testinput12
diff --git a/testdata/testinput14 b/testdata/testinput14
index 8a17ae7..8880b5c 100644
--- a/testdata/testinput14
+++ b/testdata/testinput14
@@ -78,4 +78,31 @@
# ----------------------------------------------------
+# ----------------------------------------------------
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
+# mode; for the other widths they will fail.
+
+/k*\x{ffffffff}/caseless,ucp
+ \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+
+# ----------------------------------------------------
+
# End of testinput14
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index 9fa93fa..616d693 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1827,5 +1827,42 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to
/[\x{ffffffff}]/caseless,ucp
Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
\x{ffffffff}xyz
+
+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
+# will give errors in 16-bit mode.
+
+/k*\x{ffffffff}/caseless,ucp
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+ \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+No match
+
+# ---------------------------------------------------------
# End of testinput12
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index 721d8bc..3c9586e 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1821,5 +1821,38 @@ No match
/[\x{ffffffff}]/caseless,ucp
\x{ffffffff}xyz
0: \x{ffffffff}
+
+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
+# will give errors in 16-bit mode.
+
+/k*\x{ffffffff}/caseless,ucp
+ \x{ffffffff}
+ 0: \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+ 0: K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}
+No match
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+No match
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+ 0: K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+No match
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+No match
+
+# ---------------------------------------------------------
# End of testinput12
diff --git a/testdata/testoutput14-16 b/testdata/testoutput14-16
index 61541f6..dd1a977 100644
--- a/testdata/testoutput14-16
+++ b/testdata/testoutput14-16
@@ -122,4 +122,42 @@ No match
# ----------------------------------------------------
+# ----------------------------------------------------
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
+# mode; for the other widths they will fail.
+
+/k*\x{ffffffff}/caseless,ucp
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+ \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+No match
+
+# ----------------------------------------------------
+
# End of testinput14
diff --git a/testdata/testoutput14-32 b/testdata/testoutput14-32
index f1f65b7..dc21569 100644
--- a/testdata/testoutput14-32
+++ b/testdata/testoutput14-32
@@ -122,4 +122,38 @@ No match
# ----------------------------------------------------
+# ----------------------------------------------------
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
+# mode; for the other widths they will fail.
+
+/k*\x{ffffffff}/caseless,ucp
+ \x{ffffffff}
+ 0: \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+ 0: K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}
+No match
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+No match
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+ 0: K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+No match
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+No match
+
+# ----------------------------------------------------
+
# End of testinput14
diff --git a/testdata/testoutput14-8 b/testdata/testoutput14-8
index aa62414..69285db 100644
--- a/testdata/testoutput14-8
+++ b/testdata/testoutput14-8
@@ -122,4 +122,42 @@ Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too
# ----------------------------------------------------
+# ----------------------------------------------------
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
+# mode; for the other widths they will fail.
+
+/k*\x{ffffffff}/caseless,ucp
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+ \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled.
+** Truncation will probably give the wrong result.
+No match
+
+# ----------------------------------------------------
+
# End of testinput14
--
2.23.0

View File

@ -0,0 +1,97 @@
From 7fe586b892c9e0cbf3b21d57cfd8135e2311e45c Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Mon, 20 Nov 2023 15:41:06 +0000
Subject: [PATCH] Fix incorrect patch in c1306126
Conflict:adapt context
Reference:https://github.com/PCRE2Project/pcre2/commit/7fe586b892c9e0cbf3b21d57cfd8135e2311e45c
---
src/pcre2_compile.c | 20 ++++++++++++++------
testdata/testinput2 | 2 ++
testdata/testoutput2 | 2 ++
3 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index fdaf2ad..9e45580 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -2782,6 +2782,7 @@ uint32_t *previous_callout = NULL;
uint32_t *parsed_pattern = cb->parsed_pattern;
uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
uint32_t *this_parsed_item = NULL;
+uint32_t *prev_parsed_item = NULL;
uint32_t meta_quantifier = 0;
uint32_t add_after_mark = 0;
uint32_t extra_options = cb->cx->extra_options;
@@ -2867,11 +2868,10 @@ while (ptr < ptrend)
uint32_t set, unset, *optset;
uint32_t terminator;
uint32_t prev_meta_quantifier;
- uint32_t *prev_parsed_item = this_parsed_item;
BOOL prev_okquantifier;
PCRE2_SPTR tempptr;
PCRE2_SIZE offset;
-
+
if (parsed_pattern >= parsed_pattern_end)
{
errorcode = ERR63; /* Internal error (parsed pattern overflow) */
@@ -2883,10 +2883,17 @@ while (ptr < ptrend)
errorcode = ERR19;
goto FAILED; /* Parentheses too deeply nested */
}
-
- /* Remember where this item started */
- this_parsed_item = parsed_pattern;
+ /* If the last time round this loop something was added, parsed_pattern will
+ no longer be equal to this_parsed_item. Remember where the previous item
+ started and reset for the next item. Note that sometimes round the loop,
+ nothing gets added (e.g. for ignored white space). */
+
+ if (this_parsed_item != parsed_pattern)
+ {
+ prev_parsed_item = this_parsed_item;
+ this_parsed_item = parsed_pattern;
+ }
/* Get next input character, save its position for callout handling. */
@@ -3440,7 +3447,8 @@ while (ptr < ptrend)
/* ---- Quantifier post-processing ---- */
- /* Check that a quantifier is allowed after the previous item. */
+ /* Check that a quantifier is allowed after the previous item. This
+ guarantees that there is a previous item. */
CHECK_QUANTIFIER:
if (!prev_okquantifier)
diff --git a/testdata/testinput2 b/testdata/testinput2
index ba292d8..da845c1 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -6051,4 +6051,6 @@ a)"xI
--
\[X]{-10}
+/A +/extended
+
# End of testinput2
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 888f06a..85de4ae 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -17932,6 +17932,8 @@ No match
\[X]{-10}
** Zero or negative repeat not allowed
+/A +/extended
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
--
2.23.0

View File

@ -0,0 +1,65 @@
From 829414f8e549fe7e4b1a6696ca70664e89e5e7f0 Mon Sep 17 00:00:00 2001
From: Nicholas Wilson <niwilson@microsoft.com>
Date: Wed, 18 Sep 2024 16:39:22 +0100
Subject: [PATCH] Fix incorrect positive error code from pcre2_substitute()
(#481)
---
src/pcre2_substitute.c | 4 +++-
testdata/testinput2 | 6 ++++++
testdata/testoutput2 | 10 ++++++++++
3 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
index 86c1d1e69..862ea9f73 100644
--- a/src/pcre2_substitute.c
+++ b/src/pcre2_substitute.c
@@ -134,7 +134,9 @@ for (; ptr < ptrend; ptr++)
ptr -= 1; /* Back to last code unit of escape */
if (errorcode != 0)
{
- rc = errorcode;
+ /* errorcode from check_escape is positive, so must not be returned by
+ pcre2_substitute(). */
+ rc = PCRE2_ERROR_BADREPESCAPE;
goto EXIT;
}
diff --git a/testdata/testinput2 b/testdata/testinput2
index c2abdb890..8be78ff50 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4201,6 +4201,12 @@
123abc123\=substitute_overflow_length,replace=[1]x$1z
123abc123\=substitute_overflow_length,replace=[0]x$1z
+/a(b)c/substitute_extended
+ ZabcZ\=replace=>${1:+ yes : no }
+ ZabcZ\=replace=>${1:+ \o{100} : \o{100} }
+ ZabcZ\=replace=>${1:+ \o{Z} : no }
+ ZabcZ\=replace=>${1:+ yes : \o{Z} }
+
"((?=(?(?=(?(?=(?(?=()))))))))"
a
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 7a582cd23..ccf209b5c 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -13818,6 +13818,16 @@ Failed: error -48: no more memory: 10 code units are needed
123abc123\=substitute_overflow_length,replace=[0]x$1z
Failed: error -48: no more memory: 10 code units are needed
+/a(b)c/substitute_extended
+ ZabcZ\=replace=>${1:+ yes : no }
+ 1: Z> yes Z
+ ZabcZ\=replace=>${1:+ \o{100} : \o{100} }
+ 1: Z> @ Z
+ ZabcZ\=replace=>${1:+ \o{Z} : no }
+Failed: error -57 at offset 9 in replacement: bad escape sequence in replacement string
+ ZabcZ\=replace=>${1:+ yes : \o{Z} }
+Failed: error -57 at offset 15 in replacement: bad escape sequence in replacement string
+
"((?=(?(?=(?(?=(?(?=()))))))))"
a
0:

View File

@ -0,0 +1,263 @@
From d29e729000a3724e2aebaa64318dfd7530a55370 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Wed, 4 Sep 2024 16:18:35 +0100
Subject: [PATCH] Fix non-recognition of some octal escapes in substitute
replacement strings
---
src/pcre2_compile.c | 15 ++++++++-------
src/pcre2_substitute.c | 4 ++--
testdata/testinput11 | 6 ++++++
testdata/testinput2 | 12 ++++++++++++
testdata/testinput5 | 3 +++
testdata/testinput9 | 8 ++++++++
testdata/testoutput11-16 | 8 ++++++++
testdata/testoutput11-32 | 8 ++++++++
testdata/testoutput2 | 16 ++++++++++++++++
testdata/testoutput5 | 4 ++++
testdata/testoutput9 | 10 ++++++++++
11 files changed, 85 insertions(+), 9 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index ad2baf8..80a1a48 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -1480,8 +1480,8 @@ final code unit of the escape sequence.
This function is also called from pcre2_substitute() to handle escape sequences
in replacement strings. In this case, the cb argument is NULL, and in the case
of escapes that have further processing, only sequences that define a data
-character are recognised. The isclass argument is not relevant; the options
-argument is the final value of the compiled pattern's options.
+character are recognised. The options argument is the final value of the
+compiled pattern's options.
Arguments:
ptrptr points to the input position pointer
@@ -1489,7 +1489,7 @@ Arguments:
chptr points to a returned data character
errorcodeptr points to the errorcode variable (containing zero)
options the current options bits
- isclass TRUE if inside a character class
+ isclassorsub TRUE if in a character class or called from pcre2_substitute()
cb compile data block or NULL when called from pcre2_substitute()
Returns: zero => a data character
@@ -1500,7 +1500,7 @@ Returns: zero => a data character
int
PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr,
- int *errorcodeptr, uint32_t options, uint32_t extra_options, BOOL isclass,
+ int *errorcodeptr, uint32_t options, uint32_t extra_options, BOOL isclassorsub,
compile_block *cb)
{
BOOL utf = (options & PCRE2_UTF) != 0;
@@ -1607,7 +1607,8 @@ else
if (cb == NULL)
{
- if (c != CHAR_c && c != CHAR_o && c != CHAR_x)
+ if (c < CHAR_0 ||
+ (c > CHAR_9 && (c != CHAR_c && c != CHAR_o && c != CHAR_x)))
{
*errorcodeptr = ERR3;
return 0;
@@ -1719,7 +1720,7 @@ else
*/
case CHAR_g:
- if (isclass) break;
+ if (isclassorsub) break;
if (ptr >= ptrend)
{
@@ -1791,7 +1792,7 @@ else
case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
- if (!isclass)
+ if (!isclassorsub)
{
oldptr = ptr;
ptr--; /* Back to the digit */
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
index d1f17eb05..1ccef0660 100644
--- a/src/pcre2_substitute.c
+++ b/src/pcre2_substitute.c
@@ -130,7 +130,7 @@ for (; ptr < ptrend; ptr++)
ptr += 1; /* Must point after \ */
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
- code->overall_options, code->extra_options, FALSE, NULL);
+ code->overall_options, code->extra_options, TRUE, NULL);
ptr -= 1; /* Back to last code unit of escape */
if (errorcode != 0)
{
@@ -858,7 +858,7 @@ do
ptr++; /* Point after \ */
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
- code->overall_options, code->extra_options, FALSE, NULL);
+ code->overall_options, code->extra_options, TRUE, NULL);
if (errorcode != 0) goto BADESCAPE;
switch(rc)
diff --git a/testdata/testinput11 b/testdata/testinput11
index 2bc8a25e3..69aea351b 100644
--- a/testdata/testinput11
+++ b/testdata/testinput11
@@ -371,4 +371,10 @@
/(?i:A{1,}\6666666666)/
A\x{1b6}6666666
+/abc/substitute_extended,replace=>\777<
+ abc
+
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+
# End of testinput11
diff --git a/testdata/testinput2 b/testdata/testinput2
index 7d8dfc149..51e2095c8 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4668,6 +4668,18 @@ B)x/alt_verbnames,mark
/abcd/g
>abcd1234abcd5678<\=replace=wxyz,substitute_matched
+/abc/substitute_extended,replace=>\045<
+ abc
+
+/abc/substitute_extended,replace=>\45<
+ abc
+
+/abc/substitute_extended,replace=>\o{45}<
+ abc
+
+/abc/substitute_extended,replace=>\845<
+ abc
+
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
/((p(?'K/
diff --git a/testdata/testinput5 b/testdata/testinput5
index 9126236..da2830d 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -2188,4 +2188,7 @@
/(\xc1)\1/i,ucp
\xc1\xe1\=no_jit
+/abc/utf,substitute_extended,replace=>\777<
+ abc
+
# End of testinput5
diff --git a/testdata/testinput9 b/testdata/testinput9
index 4eb228afe..f2f50033f 100644
--- a/testdata/testinput9
+++ b/testdata/testinput9
@@ -263,4 +263,12 @@
/(?i:A{1,}\6666666666)/
A\x{1b6}6666666
+# Should cause an error
+/abc/substitute_extended,replace=>\777<
+ abc
+
+# Should cause an error
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+
# End of testinput9
diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16
index f70d89ee9..806f6b3e0 100644
--- a/testdata/testoutput11-16
+++ b/testdata/testoutput11-16
@@ -665,4 +665,12 @@ Subject length lower bound = 1
A\x{1b6}6666666
0: A\x{1b6}6666666
+/abc/substitute_extended,replace=>\777<
+ abc
+ 1: >\x{1ff}<
+
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+ 1: >\x{14e5}<
+
# End of testinput11
diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
index 961c4cd05..c5f5c8a42 100644
--- a/testdata/testoutput11-32
+++ b/testdata/testoutput11-32
@@ -671,4 +671,12 @@ Subject length lower bound = 1
A\x{1b6}6666666
0: A\x{1b6}6666666
+/abc/substitute_extended,replace=>\777<
+ abc
+ 1: >\x{1ff}<
+
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+ 1: >\x{14e5}<
+
# End of testinput11
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 1cffe6a36..eeb635d6d 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14934,6 +14934,22 @@ Failed: error -55 at offset 3 in replacement: requested value is not set
>abcd1234abcd5678<\=replace=wxyz,substitute_matched
2: >wxyz1234wxyz5678<
+/abc/substitute_extended,replace=>\045<
+ abc
+ 1: >%<
+
+/abc/substitute_extended,replace=>\45<
+ abc
+ 1: >%<
+
+/abc/substitute_extended,replace=>\o{45}<
+ abc
+ 1: >%<
+
+/abc/substitute_extended,replace=>\845<
+ abc
+ 1: >845<
+
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
Capture group count = 2
Max back reference = 1
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index b1842df..24d849c 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -4949,4 +4949,8 @@ Subject length lower bound = 3
0: \xc1\xe1
1: \xc1
+/abc/utf,substitute_extended,replace=>\777<
+ abc
+ 1: >\x{1ff}<
+
# End of testinput5
diff --git a/testdata/testoutput9 b/testdata/testoutput9
index 3613703e0..8556c9e14 100644
--- a/testdata/testoutput9
+++ b/testdata/testoutput9
@@ -371,4 +371,14 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP),
Failed: error 151 at offset 13: octal value is greater than \377 in 8-bit non-UTF-8 mode
A\x{1b6}6666666
+# Should cause an error
+/abc/substitute_extended,replace=>\777<
+ abc
+Failed: error -57 at offset 5 in replacement: bad escape sequence in replacement string
+
+# Should cause an error
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+Failed: error -57 at offset 10 in replacement: bad escape sequence in replacement string
+
# End of testinput9

View File

@ -0,0 +1,29 @@
From b88126f42382fa470b6480f82489303d4311ce18 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Thu, 16 Nov 2023 13:49:49 +0000
Subject: [PATCH] Fix oversight in DFA when changing OP_REVERSE; also add some
unrelated tests
Conflict:don't add unrelated tests
Reference:https://github.com/PCRE2Project/pcre2/commit/b88126f42382fa470b6480f82489303d4311ce18
---
src/pcre2_dfa_match.c | 2 +-
1 files changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
index e90c984..5768407 100644
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@@ -591,7 +591,7 @@ if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
end_code = this_start_code;
do
{
- size_t back = (size_t)GET(end_code, 2+LINK_SIZE);
+ size_t back = (size_t)GET2(end_code, 2+LINK_SIZE);
if (back > max_back) max_back = back;
end_code += GET(end_code, 1);
}
--
2.23.0

View File

@ -0,0 +1,108 @@
From 05206d66340341bef7a673108a855f594c148950 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Sun, 19 Nov 2023 18:32:10 +0000
Subject: [PATCH] Fix \z behaviour when matching within invalid UTF
Conflict:don't modify ChangeLog; adapt context
Reference:https://github.com/PCRE2Project/pcre2/commit/05206d66340341bef7a673108a855f594c148950
---
src/pcre2_match.c | 6 ++++--
testdata/testinput10 | 3 +++
testdata/testinput12 | 3 +++
testdata/testoutput10 | 4 ++++
testdata/testoutput12-16 | 4 ++++
testdata/testoutput12-32 | 4 ++++
6 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index 2dcf8c4..ea03976 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -6076,10 +6076,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
/* Fall through */
- /* Unconditional end of subject assertion (\z) */
+ /* Unconditional end of subject assertion (\z). We must check NOTEOL
+ because it gets set for invalid UTF fragments. */
case OP_EOD:
- if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
+ if (Feptr < mb->end_subject || (mb->moptions & PCRE2_NOTEOL) != 0)
+ RRETURN(MATCH_NOMATCH);
if (mb->partial != 0)
{
mb->hitend = TRUE;
diff --git a/testdata/testinput10 b/testdata/testinput10
index c7618b1..e901d51 100644
--- a/testdata/testinput10
+++ b/testdata/testinput10
@@ -642,4 +642,7 @@
qchq\=ph
qchq\=ps
+/A\z/utf,match_invalid_utf
+ A\x80\x42\n
+
# End of testinput10
diff --git a/testdata/testinput12 b/testdata/testinput12
index 1e552e6..5a2d8d2 100644
--- a/testdata/testinput12
+++ b/testdata/testinput12
@@ -464,6 +464,9 @@
/aa/utf,ucp,match_invalid_utf,global
\x{d800}aa
+
+/A\z/utf,match_invalid_utf
+ A\x{df00}\n
# ----------------------------------------------------
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index 18dd9d2..8145891 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -1921,4 +1921,8 @@ Partial match:
qchq\=ps
Partial match:
+/A\z/utf,match_invalid_utf
+ A\x80\x42\n
+No match
+
# End of testinput10
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index 8cbc13d..9ac403e 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1607,6 +1607,10 @@ No match
/aa/utf,ucp,match_invalid_utf,global
\x{d800}aa
0: aa
+
+/A\z/utf,match_invalid_utf
+ A\x{df00}\n
+No match
# ----------------------------------------------------
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index 1a98b4b..9396305 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1605,6 +1605,10 @@ No match
/aa/utf,ucp,match_invalid_utf,global
\x{d800}aa
0: aa
+
+/A\z/utf,match_invalid_utf
+ A\x{df00}\n
+No match
# ----------------------------------------------------
--
2.23.0

View File

@ -0,0 +1,74 @@
From 994536c96fa571bcfd9232001e73b78c6afb9e67 Mon Sep 17 00:00:00 2001
From: Zoltan Herczeg <hzmester@freemail.hu>
Date: Thu, 21 Mar 2024 07:33:17 +0000
Subject: [PATCH] Fixing an issue using empty character sets in jit
Conflict:adapt context
Reference:https://github.com/PCRE2Project/pcre2/commit/994536c96fa571bcfd9232001e73b78c6afb9e67
---
src/pcre2_jit_compile.c | 23 ++++++++++++++++-------
src/pcre2_jit_test.c | 1 +
2 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index afff36a..c19723b 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -6186,25 +6186,34 @@ if (max < 1)
/* Convert last_count to priority. */
for (i = 0; i < max; i++)
{
- SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
+ SLJIT_ASSERT(chars[i].last_count <= chars[i].count);
- if (chars[i].count == 1)
+ switch (chars[i].count)
{
+ case 0:
+ chars[i].count = 255;
+ chars[i].last_count = 0;
+ break;
+
+ case 1:
chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
/* Simplifies algorithms later. */
chars[i].chars[1] = chars[i].chars[0];
- }
- else if (chars[i].count == 2)
- {
+ break;
+
+ case 2:
SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
else
chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
- }
- else
+ break;
+
+ default:
chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
+ break;
+ }
}
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
index f051bd2..6d95bb9 100644
--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
@@ -396,6 +396,7 @@ static struct regression_test_case regression_test_cases[] = {
{ MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
{ MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
{ CMU, A, 0, 0 | F_NOMATCH, "^[\\x{0100}-\\x{017f}]", " " },
+ { M, A, 0, 0 | F_NOMATCH, "[^\\S\\W]{6}", "abcdefghijk" },
/* Unicode properties. */
{ MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
--
2.23.0

View File

@ -0,0 +1,233 @@
From ef218fbba60bfe5b0a8ac9ea4445eac5fb0847e5 Mon Sep 17 00:00:00 2001
From: Alex Dowad <alexinbeijing@gmail.com>
Date: Sat, 7 Sep 2024 00:16:03 +0900
Subject: [PATCH] Guard against out-of-bounds memory access when parsing
LIMIT_HEAP et al (#463)
Patterns passed to pcre2_compile are not guaranteed to be
null-terminated. Also, it can happen that there is an invalid
pattern like this:
(*LIMIT_HEAP=123
If the next byte of memory after the end of the pattern happens
to be a digit, it will be parsed as part of the limit value. Or,
if the next byte is a right parenthesis character, it will be taken
as the end of the (*LIMIT_HEAP=nnn) construct.
This will result in `skipatstart` being larger than `patlen`, which
will result in underflow and an erroneous call to malloc requesting
a huge number of bytes.
---
src/pcre2_compile.c | 7 ++-
src/pcre2_internal.h | 3 +
src/pcre2_util.h | 132 ++++++++++++++++++++++++++++++++++++++++++
testdata/testoutput15 | 4 +-
4 files changed, 141 insertions(+), 5 deletions(-)
create mode 100644 src/pcre2_util.h
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index e6843bb13..410f220b3 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -10552,12 +10552,12 @@ if ((options & PCRE2_LITERAL) == 0)
ptr += pp;
goto HAD_EARLY_ERROR;
}
- while (IS_DIGIT(ptr[pp]))
+ while (pp < patlen && IS_DIGIT(ptr[pp]))
{
if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */
c = c*10 + (ptr[pp++] - CHAR_0);
}
- if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS)
+ if (pp >= patlen || ptr[pp] != CHAR_RIGHT_PARENTHESIS)
{
errorcode = ERR60;
ptr += pp;
@@ -10566,7 +10566,7 @@ if ((options & PCRE2_LITERAL) == 0)
if (p->type == PSO_LIMH) limit_heap = c;
else if (p->type == PSO_LIMM) limit_match = c;
else limit_depth = c;
- skipatstart += pp - skipatstart;
+ skipatstart = ++pp;
break;
}
break; /* Out of the table scan loop */
@@ -10574,6 +10574,7 @@ if ((options & PCRE2_LITERAL) == 0)
}
if (i >= sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */
}
+ PCRE2_ASSERT(skipatstart <= patlen);
}
/* End of pattern-start options; advance to start of real regex. */
diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h
index d8fad1e..edb36ca 100644
--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@@ -1999,6 +1999,9 @@ extern void * _pcre2_memmove(void *, const void *, size_t);
#endif
#endif /* PCRE2_CODE_UNIT_WIDTH */
+
+#include "pcre2_util.h"
+
#endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */
/* End of pcre2_internal.h */
diff --git a/src/pcre2_util.h b/src/pcre2_util.h
new file mode 100644
index 0000000..ea86355
--- /dev/null
+++ b/src/pcre2_util.h
@@ -0,0 +1,132 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE2 is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Original API code Copyright (c) 1997-2012 University of Cambridge
+ New API code Copyright (c) 2016-2024 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifndef PCRE2_UTIL_H_IDEMPOTENT_GUARD
+#define PCRE2_UTIL_H_IDEMPOTENT_GUARD
+
+/* Assertion macros */
+
+#ifdef PCRE2_DEBUG
+
+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
+#include <assert.h>
+#endif
+
+/* PCRE2_ASSERT(x) can be used to inject an assert() for conditions
+that the code below doesn't support. It is a NOP for non debug builds
+but in debug builds will print information about the location of the
+code where it triggered and crash.
+
+It is meant to work like assert(), and therefore the expression used
+should indicate what the expected state is, and shouldn't have any
+side-effects. */
+
+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
+#define PCRE2_ASSERT(x) assert(x)
+#else
+#define PCRE2_ASSERT(x) do \
+{ \
+ if (!(x)) \
+ { \
+ fprintf(stderr, "Assertion failed at " __FILE__ ":%d\n", __LINE__); \
+ abort(); \
+ } \
+} while(0)
+#endif
+
+/* PCRE2_UNREACHABLE() can be used to mark locations on the code that
+shouldn't be reached. In non debug builds is defined as a hint for
+the compiler to eliminate any code after it, so it is useful also for
+performance reasons, but should be used with care because if it is
+ever reached will trigger Undefined Behaviour and if you are lucky a
+crash. In debug builds it will report the location where it was triggered
+and crash. One important point to consider when using this macro, is
+that it is only implemented for a few compilers, and therefore can't
+be relied on to always be active either, so if it is followed by some
+code it is important to make sure that the whole thing is safe to
+use even if the macro is not there (ex: make sure there is a `break`
+after it if used at the end of a `case`) and to test your code also
+with a configuration where the macro will be a NOP. */
+
+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
+#define PCRE2_UNREACHABLE() \
+assert(((void)"Execution reached unexpected point", 0))
+#else
+#define PCRE2_UNREACHABLE() do \
+{ \
+fprintf(stderr, "Execution reached unexpected point at " __FILE__ \
+ ":%d\n", __LINE__); \
+abort(); \
+} while(0)
+#endif
+
+/* PCRE2_DEBUG_UNREACHABLE() is a debug only version of the previous
+macro. It is meant to be used in places where the code is handling
+an error situation in code that shouldn't be reached, but that has
+some sort of fallback code to normally handle the error. When in
+doubt you should use this instead of the previous macro. Like in
+the previous case, it is a good idea to document as much as possible
+the reason and the actions that should be taken if it ever triggers. */
+
+#define PCRE2_DEBUG_UNREACHABLE() PCRE2_UNREACHABLE()
+
+#endif /* PCRE2_DEBUG */
+
+#ifndef PCRE2_DEBUG_UNREACHABLE
+#define PCRE2_DEBUG_UNREACHABLE() do {} while(0)
+#endif
+
+#ifndef PCRE2_UNREACHABLE
+#ifdef HAVE_BUILTIN_UNREACHABLE
+#define PCRE2_UNREACHABLE() __builtin_unreachable()
+#elif defined(HAVE_BUILTIN_ASSUME)
+#define PCRE2_UNREACHABLE() __assume(0)
+#else
+#define PCRE2_UNREACHABLE() do {} while(0)
+#endif
+#endif /* !PCRE2_UNREACHABLE */
+
+#ifndef PCRE2_ASSERT
+#define PCRE2_ASSERT(x) do {} while(0)
+#endif
+
+#endif /* PCRE2_UTIL_H_IDEMPOTENT_GUARD */
+
+/* End of pcre2_util.h */
diff --git a/testdata/testoutput15 b/testdata/testoutput15
index aa9c5c930..f36faeeaf 100644
--- a/testdata/testoutput15
+++ b/testdata/testoutput15
@@ -111,10 +111,10 @@ Minimum depth limit = 10
3: ee
/(*LIMIT_MATCH=12bc)abc/
-Failed: error 160 at offset 17: (*VERB) not recognized or malformed
+Failed: error 160 at offset 16: (*VERB) not recognized or malformed
/(*LIMIT_MATCH=4294967290)abc/
-Failed: error 160 at offset 24: (*VERB) not recognized or malformed
+Failed: error 160 at offset 23: (*VERB) not recognized or malformed
/(*LIMIT_DEPTH=4294967280)abc/I
Capture group count = 0

View File

@ -0,0 +1,68 @@
From d704ee40c5324e5ff6c08f009a7aaa3b67b71565 Mon Sep 17 00:00:00 2001
From: Nicholas Wilson <niwilson@microsoft.com>
Date: Fri, 27 Sep 2024 16:31:01 +0100
Subject: [PATCH] Improve error message for \N{name} in character classes
(#502)
---
src/pcre2_compile.c | 8 ++++++++
testdata/testinput2 | 6 ++++++
testdata/testoutput2 | 9 +++++++++
3 files changed, 23 insertions(+)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index ec4940e63..fd554f1d2 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -1542,6 +1542,14 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
#endif
}
+ /* Give an error in contexts where quantifiers are not allowed
+ (character classes; substitution strings). */
+
+ else if (isclassorsub || cb == NULL)
+ {
+ *errorcodeptr = ERR37;
+ }
+
/* Give an error if what follows is not a quantifier, but don't override
an error set by the quantifier reader (e.g. number overflow). */
diff --git a/testdata/testinput2 b/testdata/testinput2
index c6ee980..a33d987 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -913,6 +913,12 @@
/\U/I
+/[\N]/
+
+/[\N{4}]/
+
+/[\N{name}]/
+
/a{1,3}b/ungreedy
ab
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 2f2b3d1..4c07b72 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -3245,6 +3245,15 @@ Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U,
/\U/I
Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
+/[\N]/
+Failed: error 171 at offset 3: \N is not supported in a class
+
+/[\N{4}]/
+Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
+
+/[\N{name}]/
+Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
+
/a{1,3}b/ungreedy
ab
0: ab

View File

@ -0,0 +1,79 @@
From 9783ca9bed0cfb682e7bc76ed605aeb38571930a Mon Sep 17 00:00:00 2001
From: Addison Crump <addison.crump@cispa.de>
Date: Sat, 18 Nov 2023 16:52:00 +0100
Subject: [PATCH] Sanity checks for ctype functions (#342)
* fixup: sanity checks for ctype functions
* format
* more grep fixes
* don't check if constrained by type
---
src/pcre2_convert.c | 8 ++++++++
src/pcre2grep.c | 8 ++++----
2 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/src/pcre2_convert.c b/src/pcre2_convert.c
index d45b6fe..cabbd12 100644
--- a/src/pcre2_convert.c
+++ b/src/pcre2_convert.c
@@ -541,6 +541,14 @@ Returns: !0 => character is found in the class
static BOOL
convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
{
+#if PCRE2_CODE_UNIT_WIDTH != 8
+if (c > 0xff)
+ {
+ /* ctype functions are not sane for c > 0xff */
+ return 0;
+ }
+#endif
+
switch (class_index)
{
case 1: return isalnum(c);
diff --git a/src/pcre2grep.c b/src/pcre2grep.c
index 10314a5..73ef246 100644
--- a/src/pcre2grep.c
+++ b/src/pcre2grep.c
@@ -738,7 +738,7 @@ decode_ANSI_colour(const char *cs)
WORD result = csbi.wAttributes;
while (*cs)
{
- if (isdigit(*cs))
+ if (isdigit((unsigned char)(*cs)))
{
int code = atoi(cs);
if (code == 1) result |= 0x08;
@@ -752,7 +752,7 @@ while (*cs)
else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
- while (isdigit(*cs)) cs++;
+ while (isdigit((unsigned char)(*cs))) cs++;
}
if (*cs) cs++;
}
@@ -3921,7 +3921,7 @@ for (i = 1; i < argc; i++)
if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
{
- if (isdigit((unsigned char)s[1])) break;
+ if (isdigit((unsigned char)(s[1]))) break;
}
else /* Check for an option with data */
{
@@ -4397,7 +4397,7 @@ for (fn = file_lists; fn != NULL; fn = fn->next)
{
int frc;
char *end = buffer + (int)strlen(buffer);
- while (end > buffer && isspace(end[-1])) end--;
+ while (end > buffer && isspace((unsigned char)(end[-1]))) end--;
*end = 0;
if (*buffer != 0)
{
--
2.33.0

View File

@ -0,0 +1,24 @@
From d231944236c6516de2831cbdde3069dab180ae81 Mon Sep 17 00:00:00 2001
From: pkuzco <b.naamneh@gmail.com>
Date: Mon, 9 Oct 2023 17:46:42 +0200
Subject: [PATCH] fix a possible integer overflow in DFA matching (#305)
---
src/pcre2_dfa_match.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
index 518ac07..13b1ae4 100644
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@@ -428,7 +428,7 @@ overflow. */
else
{
- uint32_t newsize = (rws->size >= UINT32_MAX/2)? UINT32_MAX/2 : rws->size * 2;
+ uint32_t newsize = (rws->size >= UINT32_MAX/(sizeof(int)*2))? UINT32_MAX/sizeof(int) : rws->size * 2;
uint32_t newsizeK = newsize/(1024/sizeof(int));
if (newsizeK + mb->heap_used > mb->heap_limit)
--
2.33.0

View File

@ -0,0 +1,118 @@
From bc367f1880ae5ccc771d5780e35df4c42744a9c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com>
Date: Sun, 22 Sep 2024 01:49:03 -0700
Subject: [PATCH] pcre2_compile: avoid 1 byte buffer overread parsing VERBs
(#487)
As reported recently by ef218fb (Guard against out-of-bounds memory
access when parsing LIMIT_HEAP et al (#463), 2024-09-07), a malformed
pattern could result in reading 1 byte past its end.
Fix a similar issue that affects all VERBs and add test cases to
ensure the original bug and all its siblings are no longer an issue.
While at it fix the wording of the related documentation.
---
doc/pcre2syntax.3 | 4 ++--
src/pcre2_compile.c | 11 +++--------
testdata/testinput2 | 8 ++++++++
testdata/testoutput2 | 12 ++++++++++++
4 files changed, 25 insertions(+), 10 deletions(-)
diff --git a/doc/pcre2syntax.3 b/doc/pcre2syntax.3
index 232125b82..db0bb6586 100644
--- a/doc/pcre2syntax.3
+++ b/doc/pcre2syntax.3
@@ -408,8 +408,8 @@ only one hyphen. Setting (but no unsetting) is allowed after (?^ for example
example (?i:...).
.P
The following are recognized only at the very start of a pattern or after one
-of the newline or \eR options with similar syntax. More than one of them may
-appear. For the first three, d is a decimal number.
+of the newline or \eR sequences or options with similar syntax. More than one
+of them may appear. For the first three, d is a decimal number.
.sp
(*LIMIT_DEPTH=d) set the backtracking limit to d
(*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 7e48b26..3d9a500 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -9877,13 +9877,14 @@ if ((options & PCRE2_LITERAL) == 0)
{
for (i = 0; i < sizeof(pso_list)/sizeof(pso); i++)
{
- uint32_t c, pp;
pso *p = pso_list + i;
if (patlen - skipatstart - 2 >= p->length &&
PRIV(strncmp_c8)(ptr + skipatstart + 2, (char *)(p->name),
p->length) == 0)
{
+ uint32_t c, pp;
+
skipatstart += p->length + 2;
switch(p->type)
{
@@ -9910,18 +9911,12 @@ if ((options & PCRE2_LITERAL) == 0)
case PSO_LIMH:
c = 0;
pp = skipatstart;
- if (!IS_DIGIT(ptr[pp]))
- {
- errorcode = ERR60;
- ptr += pp;
- goto HAD_EARLY_ERROR;
- }
while (pp < patlen && IS_DIGIT(ptr[pp]))
{
if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */
c = c*10 + (ptr[pp++] - CHAR_0);
}
- if (pp >= patlen || ptr[pp] != CHAR_RIGHT_PARENTHESIS)
+ if (pp >= patlen || pp == skipatstart || ptr[pp] != CHAR_RIGHT_PARENTHESIS)
{
errorcode = ERR60;
ptr += pp;
diff --git a/testdata/testinput2 b/testdata/testinput2
index a869c5bc2..542d14520 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5261,6 +5261,14 @@ a)"xI
/(*LIMIT_HEAP=0)xxx/I
+/(*LIMIT_HEAP=123/use_length
+
+/(*LIMIT_MATCH=/use_length
+
+/(*CRLF)(*LIMIT_DEPTH=/use_length
+
+/(*CRLF)(*LIMIT_RECURSION=1)(*BOGUS/use_length
+
/\d{0,3}(*:abc)(?C1)xxx/callout_info
# ----------------------------------------------------------------------
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index bf7b7620e..b99d64781 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -16220,6 +16220,18 @@ First code unit = 'x'
Last code unit = 'x'
Subject length lower bound = 3
+/(*LIMIT_HEAP=123/use_length
+Failed: error 160 at offset 16: (*VERB) not recognized or malformed
+
+/(*LIMIT_MATCH=/use_length
+Failed: error 160 at offset 14: (*VERB) not recognized or malformed
+
+/(*CRLF)(*LIMIT_DEPTH=/use_length
+Failed: error 160 at offset 21: (*VERB) not recognized or malformed
+
+/(*CRLF)(*LIMIT_RECURSION=1)(*BOGUS/use_length
+Failed: error 160 at offset 34: (*VERB) not recognized or malformed
+
/\d{0,3}(*:abc)(?C1)xxx/callout_info
Callout 1 x

View File

@ -1,6 +1,6 @@
Name: pcre2 Name: pcre2
Version: 10.35 Version: 10.35
Release: 5 Release: 10
Summary: Perl Compatible Regular Expressions Summary: Perl Compatible Regular Expressions
License: BSD License: BSD
URL: http://www.pcre.org/ URL: http://www.pcre.org/
@ -36,6 +36,28 @@ Patch6019: backport-Fixed-an-issue-in-the-backtracking-optimization-of-c.pat
Patch6020: backport-jit-fail-early-in-ffcps_-if-subject-shorter-than-off.patch Patch6020: backport-jit-fail-early-in-ffcps_-if-subject-shorter-than-off.patch
Patch6021: backport-jit-fix-pcre2_jit_free_unused_memory-if-sljit-not-us.patch Patch6021: backport-jit-fix-pcre2_jit_free_unused_memory-if-sljit-not-us.patch
Patch6022: backport-fix-CVE-2022-41409.patch Patch6022: backport-fix-CVE-2022-41409.patch
Patch6023: backport-fix-a-possible-integer-overflow-in-DFA-matching-305.patch
Patch6024: backport-Fix-oversight-in-DFA-when-changing-OP_REVERSE-also-a.patch
Patch6025: backport-Fix-32-bit-quantifier-following-a-character-larger-t.patch
Patch6026: backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch
Patch6027: backport-Fix-incorrect-patch-in-c1306126.patch
Patch6028: backport-Fix-another-oversight-in-c1306126.patch
Patch6029: backport-Fix-X-matching-in-32-bit-mode-without-UTF-in-JIT.patch
Patch6030: backport-Fix-bad-patch-in-05206d66.-The-interpreter-was-handl.patch
Patch6031: backport-Fix-backref-iterators-when-PCRE2_MATCH_UNSET_BACKREF.patch
Patch6032: backport-Fix-compile-loop-in-32-bit-mode-for-characters-above.patch
Patch6033: backport-Fix-incorrect-matching-of-0xffffffff-to-any-characte.patch
Patch6034: backport-Fix-accept-and-endanchored-interaction-in-JIT.patch
Patch6035: backport-Fix-backreferences-with-unset-backref-and-non-greedy.patch
Patch6036: backport-Sanity-checks-for-ctype-functions-342.patch
Patch6037: backport-Fix-incorrect-class-character-matches-in-JIT.patch
Patch6038: backport-Fixing-an-issue-using-empty-character-sets-in-jit.patch
Patch6039: backport-Fix-non-recognition-of-some-octal-escapes-in-substitute.patch
Patch6040: backport-Guard-against-out-of-bounds-memory-access-when-parsing.patch
Patch6041: backport-Add-Perl-titlecasing-475.patch
Patch6042: backport-Fix-incorrect-positive-error-code-from-pcre2_substitute.patch
Patch6043: backport-pcre2_compile-avoid-1-byte-buffer-overread-parsing-V.patch
Patch6044: backport-Improve-error-message-for-N-name-in-character-classes.patch
BuildRequires: autoconf libtool automake coreutils gcc make readline-devel BuildRequires: autoconf libtool automake coreutils gcc make readline-devel
Obsoletes: pcre2-utf16 pcre2-utf32 pcre2-tools Obsoletes: pcre2-utf16 pcre2-utf32 pcre2-tools
@ -152,6 +174,22 @@ make check
%{_pkgdocdir}/html/ %{_pkgdocdir}/html/
%changelog %changelog
* Wed Dec 4 2024 hugel <gengqihu2@h-partners.com> - 10.35-10
- DESC:sync patches from upstream
* Tue May 07 2024 xujing <xujing125@huawei.com> - 10.35-9
- DESC:Fixing an issue using empty character sets in jit
* Tue Feb 06 2024 xujing <xujing125@huawei.com> - 10.35-8
- DESC:Sanity checks for ctype functions
Fix incorrect class character matches in JIT
* Mon Jan 22 2024 xujing <xujing125@huawei.com> - 10.35-7
- DESC:sync patches from upstream to fix some bugs
* Thu Dec 14 2023 xujing <xujing125@huawei.com> - 10.35-6
- DESC:fix a possible integer overflow in DFA matching (#305)
* Mon Jul 31 2023 yangmingtai <yangmingtai@huawei.com> - 10.35-5 * Mon Jul 31 2023 yangmingtai <yangmingtai@huawei.com> - 10.35-5
- DESC:fix CVE-2022-41409 - DESC:fix CVE-2022-41409