perl/backport-CVE-2020-12723.patch
zhanliwen 0a5b95149c cve
2020-08-03 15:43:54 +08:00

278 lines
12 KiB
Diff

From c0b79e3c354b25f9fb732bd49f668ae5eeaa723a Mon Sep 17 00:00:00 2001
From: Hugo van der Sanden <hv@crypt.org>
Date: Sat, 11 Apr 2020 14:10:24 +0100
Subject: [PATCH] study_chunk: avoid mutating regexp program within GOSUB
gh16947 and gh17743: studying GOSUB may restudy in an inner call
(via a mix of recursion and enframing) something that an outer call
is in the middle of looking at. Let the outer frame deal with it.
(CVE-2020-12723)
(cherry picked from commit c4033e740bd18d9fbe3456a9db2ec2053cdc5271)
port from:
https://github.com/perl/perl5/commit/66bbb51b93253a3f87d11c2695cfb7bdb782184a
Signed-off-by: Peibao Liu <peibao.liu@windriver.com>
---
embed.fnc | 2 +-
embed.h | 2 +-
proto.h | 2 +-
regcomp.c | 42 ++++++++++++++++++++++++++++++------------
t/re/pat.t | 27 ++++++++++++++++++++++++++-
5 files changed, 59 insertions(+), 16 deletions(-)
diff -Naur a/embed.fnc b/embed.fnc
--- a/embed.fnc 2020-07-14 15:18:38.077000000 -0400
+++ b/embed.fnc 2020-07-14 15:21:38.116000000 -0400
@@ -2475,7 +2475,7 @@
|NULLOK struct scan_data_t *data \
|I32 stopparen|U32 recursed_depth \
|NULLOK regnode_ssc *and_withp \
- |U32 flags|U32 depth
+ |U32 flags|U32 depth|bool was_mutate_ok
Es |void |rck_elide_nothing|NN regnode *node
EsR |SV * |get_ANYOFM_contents|NN const regnode * n
EsRn |U32 |add_data |NN RExC_state_t* const pRExC_state \
diff -Naur a/embed.h b/embed.h
--- a/embed.h 2020-07-14 15:18:38.095000000 -0400
+++ b/embed.h 2020-07-14 15:21:58.416000000 -0400
@@ -1232,7 +1232,7 @@
#define ssc_is_cp_posixl_init S_ssc_is_cp_posixl_init
#define ssc_or(a,b,c) S_ssc_or(aTHX_ a,b,c)
#define ssc_union(a,b,c) S_ssc_union(aTHX_ a,b,c)
-#define study_chunk(a,b,c,d,e,f,g,h,i,j,k) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k)
+#define study_chunk(a,b,c,d,e,f,g,h,i,j,k,l) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k,l)
# endif
# if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C)
#define _invlist_dump(a,b,c,d) Perl__invlist_dump(aTHX_ a,b,c,d)
diff -Naur a/proto.h b/proto.h
--- a/proto.h 2020-07-14 15:18:38.074000000 -0400
+++ b/proto.h 2020-07-14 15:22:41.447000000 -0400
@@ -5593,7 +5593,7 @@
#define PERL_ARGS_ASSERT_SSC_UNION \
assert(ssc); assert(invlist)
#endif
-STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth);
+STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth, bool was_mutate_ok);
#define PERL_ARGS_ASSERT_STUDY_CHUNK \
assert(pRExC_state); assert(scanp); assert(minlenp); assert(deltap); assert(last)
#endif
diff -Naur a/regcomp.c b/regcomp.c
--- a/regcomp.c 2020-07-14 15:18:38.091000000 -0400
+++ b/regcomp.c 2020-07-14 15:34:01.739000000 -0400
@@ -110,6 +110,7 @@
regnode *next_regnode; /* next node to process when last is reached */
U32 prev_recursed_depth;
I32 stopparen; /* what stopparen do we use */
+ bool in_gosub; /* this or an outer frame is for GOSUB */
struct scan_frame *this_prev_frame; /* this previous frame */
struct scan_frame *prev_frame; /* previous frame */
@@ -4225,7 +4226,7 @@
I32 stopparen,
U32 recursed_depth,
regnode_ssc *and_withp,
- U32 flags, U32 depth)
+ U32 flags, U32 depth, bool was_mutate_ok)
/* scanp: Start here (read-write). */
/* deltap: Write maxlen-minlen here. */
/* last: Stop before this one. */
@@ -4303,6 +4304,10 @@
node length to get a real minimum (because
the folded version may be shorter) */
bool unfolded_multi_char = FALSE;
+ /* avoid mutating ops if we are anywhere within the recursed or
+ * enframed handling for a GOSUB: the outermost level will handle it.
+ */
+ bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
/* Peephole optimizer: */
DEBUG_STUDYDATA("Peep", data, depth, is_inf);
DEBUG_PEEP("Peep", scan, depth, flags);
@@ -4313,7 +4318,8 @@
* parsing code, as each (?:..) is handled by a different invocation of
* reg() -- Yves
*/
- JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
+ if (mutate_ok)
+ JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
/* Follow the next-chain of the current node and optimize
away all the NOTHINGs from it.
@@ -4345,7 +4351,7 @@
/* DEFINEP study_chunk() recursion */
(void)study_chunk(pRExC_state, &scan, &minlen,
&deltanext, next, &data_fake, stopparen,
- recursed_depth, NULL, f, depth+1);
+ recursed_depth, NULL, f, depth+1, mutate_ok);
scan = next;
} else
@@ -4413,7 +4419,8 @@
/* recurse study_chunk() for each BRANCH in an alternation */
minnext = study_chunk(pRExC_state, &scan, minlenp,
&deltanext, next, &data_fake, stopparen,
- recursed_depth, NULL, f,depth+1);
+ recursed_depth, NULL, f, depth+1,
+ mutate_ok);
if (min1 > minnext)
min1 = minnext;
@@ -4480,9 +4487,10 @@
}
}
- if (PERL_ENABLE_TRIE_OPTIMISATION &&
- OP( startbranch ) == BRANCH )
- {
+ if (PERL_ENABLE_TRIE_OPTIMISATION
+ && OP(startbranch) == BRANCH
+ && mutate_ok
+ ) {
/* demq.
Assuming this was/is a branch we are dealing with: 'scan'
@@ -4933,6 +4941,9 @@
newframe->stopparen = stopparen;
newframe->prev_recursed_depth = recursed_depth;
newframe->this_prev_frame= frame;
+ newframe->in_gosub = (
+ (frame && frame->in_gosub) || OP(scan) == GOSUB
+ );
DEBUG_STUDYDATA("frame-new", data, depth, is_inf);
DEBUG_PEEP("fnew", scan, depth, flags);
@@ -5153,7 +5164,7 @@
(mincount == 0
? (f & ~SCF_DO_SUBSTR)
: f)
- ,depth+1);
+ , depth+1, mutate_ok);
if (flags & SCF_DO_STCLASS)
data->start_class = oclass;
@@ -5221,7 +5232,9 @@
if ( OP(oscan) == CURLYX && data
&& data->flags & SF_IN_PAR
&& !(data->flags & SF_HAS_EVAL)
- && !deltanext && minnext == 1 ) {
+ && !deltanext && minnext == 1
+ && mutate_ok
+ ) {
/* Try to optimize to CURLYN. */
regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
regnode * const nxt1 = nxt;
@@ -5267,10 +5280,10 @@
&& !(data->flags & SF_HAS_EVAL)
&& !deltanext /* atom is fixed width */
&& minnext != 0 /* CURLYM can't handle zero width */
-
/* Nor characters whose fold at run-time may be
* multi-character */
&& ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN)
+ && mutate_ok
) {
/* XXXX How to optimize if data == 0? */
/* Optimize to a simpler form. */
@@ -5318,7 +5331,8 @@
/* Optimize again: */
/* recurse study_chunk() on optimised CURLYX => CURLYM */
study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
- NULL, stopparen, recursed_depth, NULL, 0,depth+1);
+ NULL, stopparen, recursed_depth, NULL, 0,
+ depth+1, mutate_ok);
}
else
oscan->flags = 0;
@@ -5735,7 +5749,8 @@
/* recurse study_chunk() for lookahead body */
minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
last, &data_fake, stopparen,
- recursed_depth, NULL, f, depth+1);
+ recursed_depth, NULL, f, depth+1,
+ mutate_ok);
if (scan->flags) {
if (deltanext) {
FAIL("Variable length lookbehind not implemented");
@@ -5827,7 +5842,7 @@
*minnextp = study_chunk(pRExC_state, &nscan, minnextp,
&deltanext, last, &data_fake,
stopparen, recursed_depth, NULL,
- f,depth+1);
+ f, depth+1, mutate_ok);
if (scan->flags) {
if (deltanext) {
FAIL("Variable length lookbehind not implemented");
@@ -5988,7 +6003,8 @@
/* optimise study_chunk() for TRIE */
minnext = study_chunk(pRExC_state, &scan, minlenp,
&deltanext, (regnode *)nextbranch, &data_fake,
- stopparen, recursed_depth, NULL, f,depth+1);
+ stopparen, recursed_depth, NULL, f, depth+1,
+ mutate_ok);
}
if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
nextbranch= regnext((regnode*)nextbranch);
@@ -7673,7 +7689,7 @@
&data, -1, 0, NULL,
SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag
| (restudied ? SCF_TRIE_DOING_RESTUDY : 0),
- 0);
+ 0, TRUE);
CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
@@ -7802,7 +7818,7 @@
SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
? SCF_TRIE_DOING_RESTUDY
: 0),
- 0);
+ 0, TRUE);
CHECK_RESTUDY_GOTO_butfirst(NOOP);
diff -Naur a/t/re/pat.t b/t/re/pat.t
--- a/t/re/pat.t 2020-07-14 15:18:37.898000000 -0400
+++ b/t/re/pat.t 2020-07-14 15:40:01.252000000 -0400
@@ -23,7 +23,7 @@
skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
skip_all_without_unicode_tables();
-plan tests => 848; # Update this when adding/deleting tests.
+plan tests => 852; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -1948,6 +1948,30 @@
fresh_perl_is('m m0*0+\Rm', "",{},"Undefined behavior in address sanitizer");
}
+ # gh16947: test regexp corruption (GOSUB)
+ {
+ fresh_perl_is(q{
+ 'xy' =~ /x(?0)|x(?|y|y)/ && print 'ok'
+ }, 'ok', {}, 'gh16947: test regexp corruption (GOSUB)');
+ }
+ # gh16947: test fix doesn't break SUSPEND
+ {
+ fresh_perl_is(q{ 'sx' =~ m{ss++}i; print 'ok' },
+ 'ok', {}, "gh16947: test fix doesn't break SUSPEND");
+ }
+
+ # gh17743: more regexp corruption via GOSUB
+ {
+ fresh_perl_is(q{
+ "0" =~ /((0(?0)|000(?|0000|0000)(?0))|)/; print "ok"
+ }, 'ok', {}, 'gh17743: test regexp corruption (1)');
+
+ fresh_perl_is(q{
+ "000000000000" =~ /(0(())(0((?0)())|000(?|\x{ef}\x{bf}\x{bd}|\x{ef}\x{bf}\x{bd}))|)/;
+ print "ok"
+ }, 'ok', {}, 'gh17743: test regexp corruption (2)');
+ }
+
} # End of sub run_tests
1;