Skip to content

Commit eafed1e

Browse files
Fix lookaround union order
1 parent 183da7a commit eafed1e

File tree

1 file changed

+49
-1
lines changed

1 file changed

+49
-1
lines changed

regex-automata/src/nfa/thompson/compiler.rs

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -996,7 +996,7 @@ impl Compiler {
996996
if has_lookarounds {
997997
let lookaround_prefix =
998998
self.c_at_least(&Hir::dot(hir::Dot::AnyByte), false, 0)?;
999-
let lookaround_alt = self.add_union_reverse()?;
999+
let lookaround_alt = self.add_union()?;
10001000
self.patch(lookaround_prefix.end, lookaround_alt)?;
10011001
self.patch(top_level_alt, lookaround_prefix.start)?;
10021002
self.lookaround_alt.borrow_mut().replace(lookaround_alt);
@@ -2037,6 +2037,22 @@ mod tests {
20372037
}
20382038
}
20392039

2040+
fn s_write_lookaround(id: usize) -> State {
2041+
State::WriteLookaround {
2042+
look_idx: SmallIndex::new(id)
2043+
.expect("look-around index too large"),
2044+
}
2045+
}
2046+
2047+
fn s_check_lookaround(id: usize, positive: bool, next: usize) -> State {
2048+
State::CheckLookaround {
2049+
look_idx: SmallIndex::new(id)
2050+
.expect("look-around index too large"),
2051+
positive,
2052+
next: sid(next),
2053+
}
2054+
}
2055+
20402056
fn s_fail() -> State {
20412057
State::Fail
20422058
}
@@ -2262,6 +2278,38 @@ mod tests {
22622278
);
22632279
}
22642280

2281+
#[test]
2282+
fn compile_lookbehind() {
2283+
assert_eq!(
2284+
build(r"(?<=a)").states(),
2285+
&[
2286+
s_bin_union(1, 4),
2287+
s_bin_union(3, 2),
2288+
s_range(b'\x00', b'\xFF', 1),
2289+
s_byte(b'a', 5),
2290+
s_check_lookaround(0, true, 6),
2291+
s_write_lookaround(0),
2292+
s_match(0)
2293+
]
2294+
);
2295+
assert_eq!(
2296+
build(r"(?<=a(?<!b))").states(),
2297+
&[
2298+
s_bin_union(1, 8),
2299+
s_bin_union(3, 2),
2300+
s_range(b'\x00', b'\xFF', 1),
2301+
s_bin_union(5, 4),
2302+
s_byte(b'a', 6),
2303+
s_byte(b'b', 7),
2304+
s_check_lookaround(0, false, 9),
2305+
s_write_lookaround(0),
2306+
s_check_lookaround(1, true, 10),
2307+
s_write_lookaround(1),
2308+
s_match(0)
2309+
]
2310+
);
2311+
}
2312+
22652313
// This tests the use of a non-binary union, i.e., a state with more than
22662314
// 2 unconditional epsilon transitions. The only place they tend to appear
22672315
// is in reverse NFAs when shrinking is disabled. Otherwise, 'binary-union'

0 commit comments

Comments
 (0)