@@ -891,6 +891,7 @@ impl PikeVM {
891891 cache : & ' c mut Cache ,
892892 input : I ,
893893 ) -> FindMatches < ' r , ' c , ' h > {
894+ cache. keep_lookaround_state ( true ) ;
894895 let caps = Captures :: matches ( self . get_nfa ( ) . group_info ( ) . clone ( ) ) ;
895896 let it = iter:: Searcher :: new ( input. into ( ) ) ;
896897 FindMatches { re : self , cache, caps, it }
@@ -934,6 +935,7 @@ impl PikeVM {
934935 cache : & ' c mut Cache ,
935936 input : I ,
936937 ) -> CapturesMatches < ' r , ' c , ' h > {
938+ cache. keep_lookaround_state ( true ) ;
937939 let caps = self . create_captures ( ) ;
938940 let it = iter:: Searcher :: new ( input. into ( ) ) ;
939941 CapturesMatches { re : self , cache, caps, it }
@@ -1265,42 +1267,48 @@ impl PikeVM {
12651267 ref mut lookaround,
12661268 ref mut curr_lookaround,
12671269 ref mut next_lookaround,
1270+ ref mut match_lookaround,
1271+ ref keep_lookaround_state,
12681272 } = cache;
12691273
1270- // This initializes the look-behind threads from the start of the input
1271- // Note: since capture groups are not allowed inside look-behinds,
1272- // there won't be any Capture epsilon transitions and hence it is ok to
1273- // use &mut [] for the slots parameter. We need to add the start states
1274- // in reverse because nested look-behinds have a higher index but must
1275- // be executed first.
1276- for look_behind_start in self . nfa . look_behind_starts ( ) {
1277- self . epsilon_closure (
1278- stack,
1279- & mut [ ] ,
1280- curr_lookaround,
1281- lookaround,
1282- input,
1283- 0 ,
1284- * look_behind_start,
1285- ) ;
1286- }
1274+ if let Some ( active) = match_lookaround {
1275+ * curr_lookaround = active. clone ( ) ;
1276+ } else {
1277+ // This initializes the look-behind threads from the start of the input
1278+ // Note: since capture groups are not allowed inside look-behinds,
1279+ // there won't be any Capture epsilon transitions and hence it is ok to
1280+ // use &mut [] for the slots parameter. We need to add the start states
1281+ // in reverse because nested look-behinds have a higher index but must
1282+ // be executed first.
1283+ for look_behind_start in self . nfa . look_behind_starts ( ) {
1284+ self . epsilon_closure (
1285+ stack,
1286+ & mut [ ] ,
1287+ curr_lookaround,
1288+ lookaround,
1289+ input,
1290+ 0 ,
1291+ * look_behind_start,
1292+ ) ;
1293+ }
12871294
1288- // This brings the look-behind threads into the state they must be for
1289- // starting at input.start() instead of the beginning. This is
1290- // necessary for look-behinds to be able to match outside of the input
1291- // span.
1292- for lb_at in 0 ..input. start ( ) {
1293- self . nexts (
1294- stack,
1295- curr_lookaround,
1296- next_lookaround,
1297- lookaround,
1298- input,
1299- lb_at,
1300- & mut [ ] ,
1301- ) ;
1302- core:: mem:: swap ( curr_lookaround, next_lookaround) ;
1303- next_lookaround. set . clear ( ) ;
1295+ // This brings the look-behind threads into the state they must be for
1296+ // starting at input.start() instead of the beginning. This is
1297+ // necessary for lookbehinds to be able to match outside of the input
1298+ // span.
1299+ for lb_at in 0 ..input. start ( ) {
1300+ self . nexts (
1301+ stack,
1302+ curr_lookaround,
1303+ next_lookaround,
1304+ lookaround,
1305+ input,
1306+ lb_at,
1307+ & mut [ ] ,
1308+ ) ;
1309+ core:: mem:: swap ( curr_lookaround, next_lookaround) ;
1310+ next_lookaround. set . clear ( ) ;
1311+ }
13041312 }
13051313
13061314 let mut hm = None ;
@@ -1428,6 +1436,9 @@ impl PikeVM {
14281436 self . nexts ( stack, curr, next, lookaround, input, at, slots)
14291437 {
14301438 hm = Some ( HalfMatch :: new ( pid, at) ) ;
1439+ if * keep_lookaround_state {
1440+ * match_lookaround = Some ( curr_lookaround. clone ( ) ) ;
1441+ }
14311442 }
14321443 // Unless the caller asked us to return early, we need to mush on
14331444 // to see if we can extend our match. (But note that 'nexts' will
@@ -1496,6 +1507,10 @@ impl PikeVM {
14961507 ref mut lookaround,
14971508 ref mut curr_lookaround,
14981509 ref mut next_lookaround,
1510+ // It makes no sense to keep any look-behind state for this version of
1511+ // the search, since the caller receives no information about
1512+ // where the search ended.
1513+ ..
14991514 } = cache;
15001515
15011516 for look_behind_start in self . nfa . look_behind_starts ( ) {
@@ -1989,10 +2004,14 @@ impl<'r, 'c, 'h> Iterator for FindMatches<'r, 'c, 'h> {
19892004 * self ;
19902005 // 'advance' converts errors into panics, which is OK here because
19912006 // the PikeVM can never return an error.
1992- it. advance ( |input| {
2007+ let result = it. advance ( |input| {
19932008 re. search ( cache, input, caps) ;
19942009 Ok ( caps. get_match ( ) )
1995- } )
2010+ } ) ;
2011+ if result. is_none ( ) {
2012+ cache. keep_lookaround_state ( false ) ;
2013+ }
2014+ result
19962015 }
19972016}
19982017
@@ -2034,6 +2053,7 @@ impl<'r, 'c, 'h> Iterator for CapturesMatches<'r, 'c, 'h> {
20342053 if caps. is_match ( ) {
20352054 Some ( caps. clone ( ) )
20362055 } else {
2056+ cache. keep_lookaround_state ( false ) ;
20372057 None
20382058 }
20392059 }
@@ -2070,6 +2090,12 @@ pub struct Cache {
20702090 curr_lookaround : ActiveStates ,
20712091 /// The next set of states to be explored for look-behind subexpressions.
20722092 next_lookaround : ActiveStates ,
2093+ /// The active set of states when a match was found. This is needed
2094+ /// to resume a search without recomputing look-behind subexpressions.
2095+ match_lookaround : Option < ActiveStates > ,
2096+ /// When true, use the states of `match_lookaround` to initialize a search,
2097+ /// otherwise recompute from the beginning of the haystack.
2098+ keep_lookaround_state : bool ,
20732099}
20742100
20752101impl Cache {
@@ -2089,6 +2115,8 @@ impl Cache {
20892115 lookaround : vec ! [ None ; re. lookaround_count( ) ] ,
20902116 curr_lookaround : ActiveStates :: new ( re) ,
20912117 next_lookaround : ActiveStates :: new ( re) ,
2118+ match_lookaround : None ,
2119+ keep_lookaround_state : false ,
20922120 }
20932121 }
20942122
@@ -2135,6 +2163,24 @@ impl Cache {
21352163 self . curr_lookaround . reset ( re) ;
21362164 self . next_lookaround . reset ( re) ;
21372165 self . lookaround = vec ! [ None ; re. lookaround_count( ) ] ;
2166+ self . match_lookaround = None ;
2167+ self . keep_lookaround_state = false ;
2168+ }
2169+
2170+ /// Set this cache to keep the state of look-behind assertions upon a
2171+ /// match being found.
2172+ ///
2173+ /// This must only be called with a value of `true` when a new search is
2174+ /// started at the end of a previously found match, otherwise the result
2175+ /// of any search after this call will most likely be wrong.
2176+ ///
2177+ /// Calling this function with a value of `false` will clear any previously
2178+ /// stored look-behind state.
2179+ pub fn keep_lookaround_state ( & mut self , keep : bool ) {
2180+ self . keep_lookaround_state = keep;
2181+ if !keep {
2182+ self . match_lookaround = None ;
2183+ }
21382184 }
21392185
21402186 /// Returns the heap memory usage, in bytes, of this cache.
@@ -2143,11 +2189,16 @@ impl Cache {
21432189 /// compute that, use `std::mem::size_of::<Cache>()`.
21442190 pub fn memory_usage ( & self ) -> usize {
21452191 use core:: mem:: size_of;
2192+ let match_lookaround_memory = match & self . match_lookaround {
2193+ Some ( ml) => ml. memory_usage ( ) ,
2194+ None => 0 ,
2195+ } ;
21462196 ( self . stack . len ( ) * size_of :: < FollowEpsilon > ( ) )
21472197 + self . curr . memory_usage ( )
21482198 + self . next . memory_usage ( )
21492199 + self . curr_lookaround . memory_usage ( )
21502200 + self . next_lookaround . memory_usage ( )
2201+ + match_lookaround_memory
21512202 }
21522203
21532204 /// Clears this cache. This should be called at the start of every search
0 commit comments