Skip to content

Commit 466e42c

Browse files
committed
lite: fix stack overflow in NFA compiler
This commit fixes a bug where the parser could produce a very deeply nested Hir value beyond the configured nested limit. This was caused by the fact that the Hir can have some of its nested structures added to it without a corresponding recursive call in the parser. For example, repetition operators. This means that even if we don't blow the nest limit in the parser, the Hir itself can still become nested beyond the limit. This in turn will make it possible to unintentionally overflow the stack in subsequent recursion over the Hir value, such as in the Thompson NFA compiler. We fix this by checking the nesting limit both on every recursive parse call and also on the depth of the final Hir value once parsing is finished but before it has returned to the caller. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=60608
1 parent 5dff4bd commit 466e42c

File tree

3 files changed

+72
-5
lines changed

3 files changed

+72
-5
lines changed
Binary file not shown.

regex-lite/src/hir/parse.rs

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,24 @@ impl<'a> Parser<'a> {
377377
/// own routine.
378378
impl<'a> Parser<'a> {
379379
pub(super) fn parse(&self) -> Result<Hir, Error> {
380+
let hir = self.parse_inner()?;
381+
// While we also check nesting during parsing, that only checks the
382+
// number of recursive parse calls. It does not necessarily cover
383+
// all possible recursive nestings of the Hir itself. For example,
384+
// repetition operators don't require recursive parse calls. So one
385+
// can stack them arbitrarily without overflowing the stack in the
386+
// *parser*. But then if one recurses over the resulting Hir, a stack
387+
// overflow is possible. So here we check the Hir nesting level
388+
// thoroughly to ensure it isn't nested too deeply.
389+
//
390+
// Note that we do still need the nesting limit check in the parser as
391+
// well, since that will avoid overflowing the stack during parse time
392+
// before the complete Hir value is constructed.
393+
check_hir_nesting(&hir, self.config.nest_limit)?;
394+
Ok(hir)
395+
}
396+
397+
fn parse_inner(&self) -> Result<Hir, Error> {
380398
let depth = self.increment_depth()?;
381399
let mut alternates = vec![];
382400
let mut concat = vec![];
@@ -806,7 +824,7 @@ impl<'a> Parser<'a> {
806824
if self.bump_if("?P<") || self.bump_if("?<") {
807825
let index = self.next_capture_index()?;
808826
let name = Some(Box::from(self.parse_capture_name()?));
809-
let sub = Box::new(self.parse()?);
827+
let sub = Box::new(self.parse_inner()?);
810828
let cap = hir::Capture { index, name, sub };
811829
Ok(Some(Hir::capture(cap)))
812830
} else if self.bump_if("?") {
@@ -826,11 +844,11 @@ impl<'a> Parser<'a> {
826844
} else {
827845
assert_eq!(':', self.char());
828846
self.bump();
829-
self.parse().map(Some)
847+
self.parse_inner().map(Some)
830848
}
831849
} else {
832850
let index = self.next_capture_index()?;
833-
let sub = Box::new(self.parse()?);
851+
let sub = Box::new(self.parse_inner()?);
834852
let cap = hir::Capture { index, name: None, sub };
835853
Ok(Some(Hir::capture(cap)))
836854
}
@@ -1263,6 +1281,38 @@ impl<'a> Parser<'a> {
12631281
}
12641282
}
12651283

1284+
/// This checks the depth of the given `Hir` value, and if it exceeds the given
1285+
/// limit, then an error is returned.
1286+
fn check_hir_nesting(hir: &Hir, limit: u32) -> Result<(), Error> {
1287+
fn recurse(hir: &Hir, limit: u32, depth: u32) -> Result<(), Error> {
1288+
if depth > limit {
1289+
return Err(Error::new(ERR_TOO_MUCH_NESTING));
1290+
}
1291+
let Some(next_depth) = depth.checked_add(1) else {
1292+
return Err(Error::new(ERR_TOO_MUCH_NESTING));
1293+
};
1294+
match *hir.kind() {
1295+
HirKind::Empty
1296+
| HirKind::Char(_)
1297+
| HirKind::Class(_)
1298+
| HirKind::Look(_) => Ok(()),
1299+
HirKind::Repetition(hir::Repetition { ref sub, .. }) => {
1300+
recurse(sub, limit, next_depth)
1301+
}
1302+
HirKind::Capture(hir::Capture { ref sub, .. }) => {
1303+
recurse(sub, limit, next_depth)
1304+
}
1305+
HirKind::Concat(ref subs) | HirKind::Alternation(ref subs) => {
1306+
for sub in subs.iter() {
1307+
recurse(sub, limit, next_depth)?;
1308+
}
1309+
Ok(())
1310+
}
1311+
}
1312+
}
1313+
recurse(hir, limit, 0)
1314+
}
1315+
12661316
/// Converts the given Hir to a literal char if the Hir is just a single
12671317
/// character. Otherwise this returns an error.
12681318
///
@@ -1344,12 +1394,12 @@ mod tests {
13441394
use super::*;
13451395

13461396
fn p(pattern: &str) -> Hir {
1347-
Parser::new(Config::default(), pattern).parse().unwrap()
1397+
Parser::new(Config::default(), pattern).parse_inner().unwrap()
13481398
}
13491399

13501400
fn perr(pattern: &str) -> String {
13511401
Parser::new(Config::default(), pattern)
1352-
.parse()
1402+
.parse_inner()
13531403
.unwrap_err()
13541404
.to_string()
13551405
}

regex-lite/tests/fuzz/mod.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,23 @@ fn captures_wrong_order_min() {
1414
let _ = run(data);
1515
}
1616

17+
// Simpler regression test from a failure found by OSS-fuzz[1]. This test,
18+
// when it failed, caused a stack overflow. We fixed it by adding another nest
19+
// check on the Hir value itself, since the Hir type can have depth added to
20+
// it without recursive calls in the parser (which is where the existing nest
21+
// check was).
22+
//
23+
// Many thanks to Addison Crump for coming up with this test case[2].
24+
//
25+
// [1]: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=60608
26+
// [2]: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=60608#c1
27+
#[test]
28+
fn many_zero_to_many_reps() {
29+
let pat = format!(".{}", "*".repeat(1 << 15));
30+
let Ok(re) = regex_lite::RegexBuilder::new(&pat).build() else { return };
31+
re.is_match("");
32+
}
33+
1734
// This is the fuzz target function. We duplicate it here since this is the
1835
// thing we use to interpret the data. It is ultimately what we want to
1936
// succeed.

0 commit comments

Comments
 (0)