Skip to content

Commit ee10459

Browse files
Add error messages and fix pre-filter
We need to disable pre-filters when a regex contains lookarounds. This is because the relevant information for a lookbehind can be before the start of the match.
1 parent d435d2a commit ee10459

File tree

6 files changed

+23
-3
lines changed

6 files changed

+23
-3
lines changed

regex-automata/src/dfa/dense.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5083,6 +5083,12 @@ impl BuildError {
50835083
BuildError { kind: BuildErrorKind::Unsupported(msg) }
50845084
}
50855085

5086+
pub(crate) fn unsupported_lookaround() -> BuildError {
5087+
let msg = "cannot build DFAs for regexes with look-around\
5088+
sub-expressions; use a different regex engine";
5089+
BuildError { kind: BuildErrorKind::Unsupported(msg) }
5090+
}
5091+
50865092
pub(crate) fn too_many_states() -> BuildError {
50875093
BuildError { kind: BuildErrorKind::TooManyStates }
50885094
}

regex-automata/src/dfa/determinize.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,10 @@ impl<'a> Runner<'a> {
219219
return Err(BuildError::unsupported_dfa_word_boundary_unicode());
220220
}
221221

222+
if self.nfa.lookaround_count() > 0 {
223+
return Err(BuildError::unsupported_lookaround());
224+
}
225+
222226
// A sequence of "representative" bytes drawn from each equivalence
223227
// class. These representative bytes are fed to the NFA to compute
224228
// state transitions. This allows us to avoid re-computing state

regex-automata/src/hybrid/dfa.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4056,6 +4056,9 @@ impl Builder {
40564056
&self,
40574057
nfa: thompson::NFA,
40584058
) -> Result<DFA, BuildError> {
4059+
if nfa.lookaround_count() > 0 {
4060+
return Err(BuildError::unsupported_lookaround());
4061+
}
40594062
let quitset = self.config.quit_set_from_nfa(&nfa)?;
40604063
let classes = self.config.byte_classes_from_nfa(&nfa, &quitset);
40614064
// Check that we can fit at least a few states into our cache,

regex-automata/src/hybrid/error.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ impl BuildError {
6161
different regex engine";
6262
BuildError { kind: BuildErrorKind::Unsupported(msg) }
6363
}
64+
65+
pub(crate) fn unsupported_lookaround() -> BuildError {
66+
let msg = "cannot build DFAs for regexes with look-around\
67+
sub-expressions; use a different regex engine";
68+
BuildError { kind: BuildErrorKind::Unsupported(msg) }
69+
}
6470
}
6571

6672
#[cfg(feature = "std")]

regex-automata/src/meta/wrappers.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ impl BoundedBacktrackerEngine {
204204
{
205205
if !info.config().get_backtrack()
206206
|| info.config().get_match_kind() != MatchKind::LeftmostFirst
207+
// TODO: remove once look-around support is added.
208+
|| nfa.lookaround_count() > 0
207209
{
208210
return Ok(None);
209211
}

regex-syntax/src/hir/literal.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,9 +172,8 @@ impl Extractor {
172172
use crate::hir::HirKind::*;
173173

174174
match *hir.kind() {
175-
Empty | Look(_) | LookAround(_) => {
176-
Seq::singleton(self::Literal::exact(vec![]))
177-
}
175+
Empty | Look(_) => Seq::singleton(self::Literal::exact(vec![])),
176+
LookAround(_) => Seq::infinite(),
178177
Literal(hir::Literal(ref bytes)) => {
179178
let mut seq =
180179
Seq::singleton(self::Literal::exact(bytes.to_vec()));

0 commit comments

Comments
 (0)