@@ -1263,7 +1263,46 @@ impl PikeVM {
12631263 ref mut curr,
12641264 ref mut next,
12651265 ref mut lookaround,
1266+ ref mut curr_lookaround,
1267+ ref mut next_lookaround,
12661268 } = cache;
1269+
1270+ // This initializes the look-behind threads from the start of the input
1271+ // Note: since capture groups are not allowed inside look-behinds,
1272+ // there won't be any Capture epsilon transitions and hence it is ok to
1273+ // use &mut [] for the slots parameter. We need to add the start states
1274+ // in reverse because nested look-behinds have a higher index but must
1275+ // be executed first.
1276+ for look_behind_start in self . nfa . look_behind_starts ( ) {
1277+ self . epsilon_closure (
1278+ stack,
1279+ & mut [ ] ,
1280+ curr_lookaround,
1281+ lookaround,
1282+ input,
1283+ 0 ,
1284+ * look_behind_start,
1285+ ) ;
1286+ }
1287+
1288+ // This brings the look-behind threads into the state they must be for
1289+ // starting at input.start() instead of the beginning. This is
1290+ // necessary for lookbehinds to be able to match outside of the input
1291+ // span.
1292+ for lb_at in 0 ..input. start ( ) {
1293+ self . nexts (
1294+ stack,
1295+ curr_lookaround,
1296+ next_lookaround,
1297+ lookaround,
1298+ input,
1299+ lb_at,
1300+ & mut [ ] ,
1301+ ) ;
1302+ core:: mem:: swap ( curr_lookaround, next_lookaround) ;
1303+ next_lookaround. set . clear ( ) ;
1304+ }
1305+
12671306 let mut hm = None ;
12681307 // Yes, our search doesn't end at input.end(), but includes it. This
12691308 // is necessary because matches are delayed by one byte, just like
@@ -1374,6 +1413,17 @@ impl PikeVM {
13741413 stack, slots, curr, lookaround, input, at, start_id,
13751414 ) ;
13761415 }
1416+ // The lookbehind states must be processed first, since their
1417+ // result must be available for the processing of the main states.
1418+ self . nexts (
1419+ stack,
1420+ curr_lookaround,
1421+ next_lookaround,
1422+ lookaround,
1423+ input,
1424+ at,
1425+ & mut [ ] ,
1426+ ) ;
13771427 if let Some ( pid) =
13781428 self . nexts ( stack, curr, next, lookaround, input, at, slots)
13791429 {
@@ -1387,7 +1437,9 @@ impl PikeVM {
13871437 break ;
13881438 }
13891439 core:: mem:: swap ( curr, next) ;
1440+ core:: mem:: swap ( curr_lookaround, next_lookaround) ;
13901441 next. set . clear ( ) ;
1442+ next_lookaround. set . clear ( ) ;
13911443 at += 1 ;
13921444 }
13931445 instrument ! ( |c| c. eprint( & self . nfa) ) ;
@@ -1442,7 +1494,34 @@ impl PikeVM {
14421494 ref mut curr,
14431495 ref mut next,
14441496 ref mut lookaround,
1497+ ref mut curr_lookaround,
1498+ ref mut next_lookaround,
14451499 } = cache;
1500+
1501+ for look_behind_start in self . nfa . look_behind_starts ( ) {
1502+ self . epsilon_closure (
1503+ stack,
1504+ & mut [ ] ,
1505+ curr_lookaround,
1506+ lookaround,
1507+ input,
1508+ 0 ,
1509+ * look_behind_start,
1510+ ) ;
1511+ }
1512+ for lb_at in 0 ..input. start ( ) {
1513+ self . nexts (
1514+ stack,
1515+ curr_lookaround,
1516+ next_lookaround,
1517+ lookaround,
1518+ input,
1519+ lb_at,
1520+ & mut [ ] ,
1521+ ) ;
1522+ core:: mem:: swap ( curr_lookaround, next_lookaround) ;
1523+ next_lookaround. set . clear ( ) ;
1524+ }
14461525 for at in input. start ( ) ..=input. end ( ) {
14471526 let any_matches = !patset. is_empty ( ) ;
14481527 if curr. set . is_empty ( ) {
@@ -1459,6 +1538,15 @@ impl PikeVM {
14591538 stack, slots, curr, lookaround, input, at, start_id,
14601539 ) ;
14611540 }
1541+ self . nexts (
1542+ stack,
1543+ curr_lookaround,
1544+ next_lookaround,
1545+ lookaround,
1546+ input,
1547+ at,
1548+ & mut [ ] ,
1549+ ) ;
14621550 self . nexts_overlapping (
14631551 stack, curr, next, lookaround, input, at, patset,
14641552 ) ;
@@ -1470,7 +1558,9 @@ impl PikeVM {
14701558 break ;
14711559 }
14721560 core:: mem:: swap ( curr, next) ;
1561+ core:: mem:: swap ( curr_lookaround, next_lookaround) ;
14731562 next. set . clear ( ) ;
1563+ next_lookaround. set . clear ( ) ;
14741564 }
14751565 instrument ! ( |c| c. eprint( & self . nfa) ) ;
14761566 }
@@ -1976,6 +2066,10 @@ pub struct Cache {
19762066 /// haystack at which look-around indexed x holds and which is <= to the
19772067 /// current position".
19782068 lookaround : Vec < Option < NonMaxUsize > > ,
2069+ /// The current active states for look-behind subexpressions
2070+ curr_lookaround : ActiveStates ,
2071+ /// The next set of states to be explored for look-behind subexpressions
2072+ next_lookaround : ActiveStates ,
19792073}
19802074
19812075impl Cache {
@@ -1993,6 +2087,8 @@ impl Cache {
19932087 curr : ActiveStates :: new ( re) ,
19942088 next : ActiveStates :: new ( re) ,
19952089 lookaround : vec ! [ None ; re. lookaround_count( ) ] ,
2090+ curr_lookaround : ActiveStates :: new ( re) ,
2091+ next_lookaround : ActiveStates :: new ( re) ,
19962092 }
19972093 }
19982094
@@ -2036,6 +2132,9 @@ impl Cache {
20362132 pub fn reset ( & mut self , re : & PikeVM ) {
20372133 self . curr . reset ( re) ;
20382134 self . next . reset ( re) ;
2135+ self . curr_lookaround . reset ( re) ;
2136+ self . next_lookaround . reset ( re) ;
2137+ self . lookaround = vec ! [ None ; re. lookaround_count( ) ] ;
20392138 }
20402139
20412140 /// Returns the heap memory usage, in bytes, of this cache.
@@ -2063,6 +2162,10 @@ impl Cache {
20632162 self . stack . clear ( ) ;
20642163 self . curr . setup_search ( captures_slot_len) ;
20652164 self . next . setup_search ( captures_slot_len) ;
2165+ // capture groups are not allowed inside look-arounds, so we
2166+ // set the slot-length to zero.
2167+ self . curr_lookaround . setup_search ( 0 ) ;
2168+ self . next_lookaround . setup_search ( 0 ) ;
20662169 }
20672170}
20682171
0 commit comments