Reference:https://go-review.googlesource.com/c/go/+/384855/ Conflict:NA Score:CVE-2022-24921:7.5 Reason:fix CVE-2022-24921
198 lines
6.0 KiB
Diff
198 lines
6.0 KiB
Diff
From c8544ba60a2a4e5973aae3ae96cfa723deff3048 Mon Sep 17 00:00:00 2001
|
|
From: Russ Cox <rsc@golang.org>
|
|
Date: Wed, 2 Feb 2022 16:41:32 -0500
|
|
Subject: [Backport] [release-branch.go1.16] regexp/syntax: reject very deeply
|
|
nested regexps in Parse
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
The regexp code assumes it can recurse over the structure of
|
|
a regexp safely. Go's growable stacks make that reasonable
|
|
for all plausible regexps, but implausible ones can reach the
|
|
“infinite recursion?” stack limit.
|
|
|
|
This CL limits the depth of any parsed regexp to 1000.
|
|
That is, the depth of the parse tree is required to be ≤ 1000.
|
|
Regexps that require deeper parse trees will return ErrInternalError.
|
|
A future CL will change the error to ErrInvalidDepth,
|
|
but using ErrInternalError for now avoids introducing new API
|
|
in point releases when this is backported.
|
|
|
|
Fixes #51112.
|
|
Fixes #51117.
|
|
|
|
Change-Id: I97d2cd82195946eb43a4ea8561f5b95f91fb14c5
|
|
Reviewed-on: https://go-review.googlesource.com/c/go/+/384616
|
|
Trust: Russ Cox <rsc@golang.org>
|
|
Run-TryBot: Russ Cox <rsc@golang.org>
|
|
Reviewed-by: Ian Lance Taylor <iant@golang.org>
|
|
Reviewed-on: https://go-review.googlesource.com/c/go/+/384855
|
|
---
|
|
src/regexp/syntax/parse.go | 72 ++++++++++++++++++++++++++++++++-
|
|
src/regexp/syntax/parse_test.go | 7 ++++
|
|
2 files changed, 77 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/src/regexp/syntax/parse.go b/src/regexp/syntax/parse.go
|
|
index 7b4030935a..d7cf2afa5e 100644
|
|
--- a/src/regexp/syntax/parse.go
|
|
+++ b/src/regexp/syntax/parse.go
|
|
@@ -76,13 +76,29 @@ const (
|
|
opVerticalBar
|
|
)
|
|
|
|
+// maxHeight is the maximum height of a regexp parse tree.
|
|
+// It is somewhat arbitrarily chosen, but the idea is to be large enough
|
|
+// that no one will actually hit in real use but at the same time small enough
|
|
+// that recursion on the Regexp tree will not hit the 1GB Go stack limit.
|
|
+// The maximum amount of stack for a single recursive frame is probably
|
|
+// closer to 1kB, so this could potentially be raised, but it seems unlikely
|
|
+// that people have regexps nested even this deeply.
|
|
+// We ran a test on Google's C++ code base and turned up only
|
|
+// a single use case with depth > 100; it had depth 128.
|
|
+// Using depth 1000 should be plenty of margin.
|
|
+// As an optimization, we don't even bother calculating heights
|
|
+// until we've allocated at least maxHeight Regexp structures.
|
|
+const maxHeight = 1000
|
|
+
|
|
type parser struct {
|
|
flags Flags // parse mode flags
|
|
stack []*Regexp // stack of parsed expressions
|
|
free *Regexp
|
|
numCap int // number of capturing groups seen
|
|
wholeRegexp string
|
|
- tmpClass []rune // temporary char class work space
|
|
+ tmpClass []rune // temporary char class work space
|
|
+ numRegexp int // number of regexps allocated
|
|
+ height map[*Regexp]int // regexp height for height limit check
|
|
}
|
|
|
|
func (p *parser) newRegexp(op Op) *Regexp {
|
|
@@ -92,16 +108,52 @@ func (p *parser) newRegexp(op Op) *Regexp {
|
|
*re = Regexp{}
|
|
} else {
|
|
re = new(Regexp)
|
|
+ p.numRegexp++
|
|
}
|
|
re.Op = op
|
|
return re
|
|
}
|
|
|
|
func (p *parser) reuse(re *Regexp) {
|
|
+ if p.height != nil {
|
|
+ delete(p.height, re)
|
|
+ }
|
|
re.Sub0[0] = p.free
|
|
p.free = re
|
|
}
|
|
|
|
+func (p *parser) checkHeight(re *Regexp) {
|
|
+ if p.numRegexp < maxHeight {
|
|
+ return
|
|
+ }
|
|
+ if p.height == nil {
|
|
+ p.height = make(map[*Regexp]int)
|
|
+ for _, re := range p.stack {
|
|
+ p.checkHeight(re)
|
|
+ }
|
|
+ }
|
|
+ if p.calcHeight(re, true) > maxHeight {
|
|
+ panic(ErrInternalError)
|
|
+ }
|
|
+}
|
|
+
|
|
+func (p *parser) calcHeight(re *Regexp, force bool) int {
|
|
+ if !force {
|
|
+ if h, ok := p.height[re]; ok {
|
|
+ return h
|
|
+ }
|
|
+ }
|
|
+ h := 1
|
|
+ for _, sub := range re.Sub {
|
|
+ hsub := p.calcHeight(sub, false)
|
|
+ if h < 1+hsub {
|
|
+ h = 1 + hsub
|
|
+ }
|
|
+ }
|
|
+ p.height[re] = h
|
|
+ return h
|
|
+}
|
|
+
|
|
// Parse stack manipulation.
|
|
|
|
// push pushes the regexp re onto the parse stack and returns the regexp.
|
|
@@ -137,6 +189,7 @@ func (p *parser) push(re *Regexp) *Regexp {
|
|
}
|
|
|
|
p.stack = append(p.stack, re)
|
|
+ p.checkHeight(re)
|
|
return re
|
|
}
|
|
|
|
@@ -246,6 +299,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
|
|
re.Sub = re.Sub0[:1]
|
|
re.Sub[0] = sub
|
|
p.stack[n-1] = re
|
|
+ p.checkHeight(re)
|
|
|
|
if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
|
|
return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
|
|
@@ -693,6 +747,21 @@ func literalRegexp(s string, flags Flags) *Regexp {
|
|
// Flags, and returns a regular expression parse tree. The syntax is
|
|
// described in the top-level comment.
|
|
func Parse(s string, flags Flags) (*Regexp, error) {
|
|
+ return parse(s, flags)
|
|
+}
|
|
+
|
|
+func parse(s string, flags Flags) (_ *Regexp, err error) {
|
|
+ defer func() {
|
|
+ switch r := recover(); r {
|
|
+ default:
|
|
+ panic(r)
|
|
+ case nil:
|
|
+ // ok
|
|
+ case ErrInternalError:
|
|
+ err = &Error{Code: ErrInternalError, Expr: s}
|
|
+ }
|
|
+ }()
|
|
+
|
|
if flags&Literal != 0 {
|
|
// Trivial parser for literal string.
|
|
if err := checkUTF8(s); err != nil {
|
|
@@ -704,7 +773,6 @@ func Parse(s string, flags Flags) (*Regexp, error) {
|
|
// Otherwise, must do real work.
|
|
var (
|
|
p parser
|
|
- err error
|
|
c rune
|
|
op Op
|
|
lastRepeat string
|
|
diff --git a/src/regexp/syntax/parse_test.go b/src/regexp/syntax/parse_test.go
|
|
index 5581ba1ca5..1ef6d8a3fe 100644
|
|
--- a/src/regexp/syntax/parse_test.go
|
|
+++ b/src/regexp/syntax/parse_test.go
|
|
@@ -207,6 +207,11 @@ var parseTests = []parseTest{
|
|
// Valid repetitions.
|
|
{`((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))`, ``},
|
|
{`((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})`, ``},
|
|
+
|
|
+ // Valid nesting.
|
|
+ {strings.Repeat("(", 999) + strings.Repeat(")", 999), ``},
|
|
+ {strings.Repeat("(?:", 999) + strings.Repeat(")*", 999), ``},
|
|
+ {"(" + strings.Repeat("|", 12345) + ")", ``}, // not nested at all
|
|
}
|
|
|
|
const testFlags = MatchNL | PerlX | UnicodeGroups
|
|
@@ -482,6 +487,8 @@ var invalidRegexps = []string{
|
|
`a{100000}`,
|
|
`a{100000,}`,
|
|
"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
|
|
+ strings.Repeat("(", 1000) + strings.Repeat(")", 1000),
|
|
+ strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000),
|
|
`\Q\E*`,
|
|
}
|
|
|
|
--
|
|
2.30.0
|
|
|