Skip to content

Commit f79fbab

Browse files
shilangyuMultimodcrafter
authored andcommitted
Change engine fallbacks
1 parent f6a82b2 commit f79fbab

File tree

9 files changed

+101
-49
lines changed

9 files changed

+101
-49
lines changed

regex-automata/src/dfa/dense.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5084,7 +5084,7 @@ impl BuildError {
50845084
}
50855085

50865086
pub(crate) fn unsupported_lookaround() -> BuildError {
5087-
let msg = "cannot build DFAs for regexes with look-around\
5087+
let msg = "cannot build DFAs for regexes with look-around \
50885088
sub-expressions; use a different regex engine";
50895089
BuildError { kind: BuildErrorKind::Unsupported(msg) }
50905090
}

regex-automata/src/dfa/onepass.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,9 @@ impl<'a> InternalBuilder<'a> {
602602
));
603603
}
604604
assert_eq!(DEAD, self.add_empty_state()?);
605+
if self.nfa.lookaround_count() > 0 {
606+
return Err(BuildError::unsupported_lookaround());
607+
}
605608

606609
// This is where the explicit slots start. We care about this because
607610
// we only need to track explicit slots. The implicit slots---two for
@@ -640,7 +643,7 @@ impl<'a> InternalBuilder<'a> {
640643
match *self.nfa.state(id) {
641644
thompson::State::WriteLookAround { .. }
642645
| thompson::State::CheckLookAround { .. } => {
643-
todo!("check how to handle")
646+
return Err(BuildError::unsupported_lookaround());
644647
}
645648
thompson::State::ByteRange { ref trans } => {
646649
self.compile_transition(dfa_id, trans, epsilons)?;
@@ -3000,6 +3003,7 @@ enum BuildErrorKind {
30003003
UnsupportedLook { look: Look },
30013004
ExceededSizeLimit { limit: usize },
30023005
NotOnePass { msg: &'static str },
3006+
UnsupportedLookAround,
30033007
}
30043008

30053009
impl BuildError {
@@ -3030,6 +3034,10 @@ impl BuildError {
30303034
fn not_one_pass(msg: &'static str) -> BuildError {
30313035
BuildError { kind: BuildErrorKind::NotOnePass { msg } }
30323036
}
3037+
3038+
fn unsupported_lookaround() -> BuildError {
3039+
BuildError { kind: BuildErrorKind::UnsupportedLookAround }
3040+
}
30333041
}
30343042

30353043
#[cfg(feature = "std")]
@@ -3078,6 +3086,9 @@ impl core::fmt::Display for BuildError {
30783086
pattern is not one-pass: {}",
30793087
msg,
30803088
),
3089+
UnsupportedLookAround => {
3090+
write!(f, "one-pass DFA does not support look-arounds")
3091+
}
30813092
}
30823093
}
30833094
}

regex-automata/src/hybrid/error.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ impl BuildError {
6363
}
6464

6565
pub(crate) fn unsupported_lookaround() -> BuildError {
66-
let msg = "cannot build DFAs for regexes with look-around\
66+
let msg = "cannot build DFAs for regexes with look-around \
6767
sub-expressions; use a different regex engine";
6868
BuildError { kind: BuildErrorKind::Unsupported(msg) }
6969
}

regex-automata/src/meta/strategy.rs

Lines changed: 44 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -490,49 +490,52 @@ impl Core {
490490
// we know we aren't going to use the lazy DFA. So we do a config check
491491
// up front, which is in practice the only way we won't try to use the
492492
// DFA.
493-
let (nfarev, hybrid, dfa) =
494-
if !info.config().get_hybrid() && !info.config().get_dfa() {
495-
(None, wrappers::Hybrid::none(), wrappers::DFA::none())
493+
let (nfarev, hybrid, dfa) = if !info.config().get_hybrid()
494+
&& !info.config().get_dfa()
495+
// With look-arounds, the lazy DFA and dense DFA would fail to build
496+
|| nfa.lookaround_count() > 0
497+
{
498+
(None, wrappers::Hybrid::none(), wrappers::DFA::none())
499+
} else {
500+
// FIXME: Technically, we don't quite yet KNOW that we need
501+
// a reverse NFA. It's possible for the DFAs below to both
502+
// fail to build just based on the forward NFA. In which case,
503+
// building the reverse NFA was totally wasted work. But...
504+
// fixing this requires breaking DFA construction apart into
505+
// two pieces: one for the forward part and another for the
506+
// reverse part. Quite annoying. Making it worse, when building
507+
// both DFAs fails, it's quite likely that the NFA is large and
508+
// that it will take quite some time to build the reverse NFA
509+
// too. So... it's really probably worth it to do this!
510+
let nfarev = thompson::Compiler::new()
511+
// Currently, reverse NFAs don't support capturing groups,
512+
// so we MUST disable them. But even if we didn't have to,
513+
// we would, because nothing in this crate does anything
514+
// useful with capturing groups in reverse. And of course,
515+
// the lazy DFA ignores capturing groups in all cases.
516+
.configure(
517+
thompson_config
518+
.clone()
519+
.which_captures(WhichCaptures::None)
520+
.reverse(true),
521+
)
522+
.build_many_from_hir(hirs)
523+
.map_err(BuildError::nfa)?;
524+
let dfa = if !info.config().get_dfa() {
525+
wrappers::DFA::none()
496526
} else {
497-
// FIXME: Technically, we don't quite yet KNOW that we need
498-
// a reverse NFA. It's possible for the DFAs below to both
499-
// fail to build just based on the forward NFA. In which case,
500-
// building the reverse NFA was totally wasted work. But...
501-
// fixing this requires breaking DFA construction apart into
502-
// two pieces: one for the forward part and another for the
503-
// reverse part. Quite annoying. Making it worse, when building
504-
// both DFAs fails, it's quite likely that the NFA is large and
505-
// that it will take quite some time to build the reverse NFA
506-
// too. So... it's really probably worth it to do this!
507-
let nfarev = thompson::Compiler::new()
508-
// Currently, reverse NFAs don't support capturing groups,
509-
// so we MUST disable them. But even if we didn't have to,
510-
// we would, because nothing in this crate does anything
511-
// useful with capturing groups in reverse. And of course,
512-
// the lazy DFA ignores capturing groups in all cases.
513-
.configure(
514-
thompson_config
515-
.clone()
516-
.which_captures(WhichCaptures::None)
517-
.reverse(true),
518-
)
519-
.build_many_from_hir(hirs)
520-
.map_err(BuildError::nfa)?;
521-
let dfa = if !info.config().get_dfa() {
522-
wrappers::DFA::none()
523-
} else {
524-
wrappers::DFA::new(&info, pre.clone(), &nfa, &nfarev)
525-
};
526-
let hybrid = if !info.config().get_hybrid() {
527-
wrappers::Hybrid::none()
528-
} else if dfa.is_some() {
529-
debug!("skipping lazy DFA because we have a full DFA");
530-
wrappers::Hybrid::none()
531-
} else {
532-
wrappers::Hybrid::new(&info, pre.clone(), &nfa, &nfarev)
533-
};
534-
(Some(nfarev), hybrid, dfa)
527+
wrappers::DFA::new(&info, pre.clone(), &nfa, &nfarev)
535528
};
529+
let hybrid = if !info.config().get_hybrid() {
530+
wrappers::Hybrid::none()
531+
} else if dfa.is_some() {
532+
debug!("skipping lazy DFA because we have a full DFA");
533+
wrappers::Hybrid::none()
534+
} else {
535+
wrappers::Hybrid::new(&info, pre.clone(), &nfa, &nfarev)
536+
};
537+
(Some(nfarev), hybrid, dfa)
538+
};
536539
Ok(Core {
537540
info,
538541
pre,

regex-automata/src/nfa/thompson/backtrack.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,9 @@ impl Builder {
301301
nfa: NFA,
302302
) -> Result<BoundedBacktracker, BuildError> {
303303
nfa.look_set_any().available().map_err(BuildError::word)?;
304+
if nfa.lookaround_count() > 0 {
305+
return Err(BuildError::unsupported_lookarounds());
306+
}
304307
Ok(BoundedBacktracker { config: self.config.clone(), nfa })
305308
}
306309

@@ -1521,7 +1524,9 @@ impl BoundedBacktracker {
15211524
}
15221525
State::WriteLookAround { .. }
15231526
| State::CheckLookAround { .. } => {
1524-
todo!("check how to handle")
1527+
unimplemented!(
1528+
"backtracking engine does not support look-arounds"
1529+
);
15251530
}
15261531
State::Union { ref alternates } => {
15271532
sid = match alternates.get(0) {

regex-automata/src/nfa/thompson/compiler.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -954,6 +954,13 @@ impl Compiler {
954954
{
955955
return Err(BuildError::unsupported_captures());
956956
}
957+
if self.config.get_reverse()
958+
&& exprs.iter().any(|e| {
959+
(e.borrow() as &Hir).properties().contains_lookaround_expr()
960+
})
961+
{
962+
return Err(BuildError::unsupported_lookarounds());
963+
}
957964

958965
self.builder.borrow_mut().clear();
959966
self.builder.borrow_mut().set_utf8(self.config.get_utf8());

regex-automata/src/nfa/thompson/error.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,13 @@ enum BuildErrorKind {
8181
/// should support it at some point.
8282
#[cfg(feature = "syntax")]
8383
UnsupportedCaptures,
84+
/// An error that occurs when one tries to build a reverse NFA with
85+
/// look-around sub-expressions. Currently, this isn't supported, but we
86+
/// probably should support it at some point.
87+
///
88+
/// This is also emmitted by the backtracking engine which does not
89+
/// support look-around sub-expressions.
90+
UnsupportedLookArounds,
8491
}
8592

8693
impl BuildError {
@@ -142,6 +149,10 @@ impl BuildError {
142149
pub(crate) fn unsupported_captures() -> BuildError {
143150
BuildError { kind: BuildErrorKind::UnsupportedCaptures }
144151
}
152+
153+
pub(crate) fn unsupported_lookarounds() -> BuildError {
154+
BuildError { kind: BuildErrorKind::UnsupportedLookArounds }
155+
}
145156
}
146157

147158
#[cfg(feature = "std")]
@@ -201,6 +212,11 @@ impl core::fmt::Display for BuildError {
201212
"currently captures must be disabled when compiling \
202213
a reverse NFA",
203214
),
215+
BuildErrorKind::UnsupportedLookArounds => write!(
216+
f,
217+
"currently look-around sub-expressions cannot be in the pattern \
218+
when compiling a reverse NFA or using the backtracking engine",
219+
),
204220
}
205221
}
206222
}

regex-automata/src/util/determinize/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ pub(crate) fn next(
253253
| thompson::State::Capture { .. } => {}
254254
thompson::State::CheckLookAround { .. }
255255
| thompson::State::WriteLookAround { .. } => {
256-
todo!("check how to handle")
256+
unimplemented!("look-around support in DFA")
257257
}
258258
thompson::State::Match { pattern_id } => {
259259
// Notice here that we are calling the NEW state a match
@@ -405,7 +405,7 @@ pub(crate) fn epsilon_closure(
405405
| thompson::State::Match { .. } => break,
406406
thompson::State::WriteLookAround { .. }
407407
| thompson::State::CheckLookAround { .. } => {
408-
todo!("check how to handle")
408+
unimplemented!("look-around support in DFA")
409409
}
410410
thompson::State::Look { look, next } => {
411411
if !look_have.contains(look) {
@@ -475,7 +475,7 @@ pub(crate) fn add_nfa_states(
475475
}
476476
thompson::State::CheckLookAround { .. }
477477
| thompson::State::WriteLookAround { .. } => {
478-
todo!("check how to handle")
478+
unimplemented!("look-around support in DFA")
479479
}
480480
thompson::State::Union { .. }
481481
| thompson::State::BinaryUnion { .. } => {

regex-automata/tests/nfa/thompson/backtrack/suite.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,17 @@ fn compiler(
108108
if !configure_backtrack_builder(test, &mut builder) {
109109
return Ok(CompiledRegex::skip());
110110
}
111-
let re = builder.build_many(&regexes)?;
111+
let re = match builder.build_many(&regexes) {
112+
Ok(re) => re,
113+
// Due to errors being opaque, we need to check the error message to skip tests with look-arounds
114+
Err(err) => {
115+
if test.compiles() && err.to_string().contains("look-around") {
116+
return Ok(CompiledRegex::skip());
117+
}
118+
119+
return Err(err.into());
120+
}
121+
};
112122
// The backtracker doesn't support lookarounds, so skip if there are any.
113123
if re.get_nfa().lookaround_count() > 0 {
114124
return Ok(CompiledRegex::skip());

0 commit comments

Comments
 (0)