@@ -17,7 +17,9 @@ use crate::{
1717 empty, iter,
1818 prefilter:: Prefilter ,
1919 primitives:: { NonMaxUsize , PatternID , SmallIndex , StateID } ,
20- search:: { Anchored , Input , Match , MatchKind , PatternSet , Span } ,
20+ search:: {
21+ Anchored , HalfMatch , Input , Match , MatchKind , PatternSet , Span ,
22+ } ,
2123 sparse_set:: SparseSet ,
2224 } ,
2325} ;
@@ -1094,7 +1096,8 @@ impl PikeVM {
10941096 ) -> Option < PatternID > {
10951097 let utf8empty = self . get_nfa ( ) . has_empty ( ) && self . get_nfa ( ) . is_utf8 ( ) ;
10961098 if !utf8empty {
1097- return self . search_slots_imp ( cache, input, slots) ;
1099+ let hm = self . search_slots_imp ( cache, input, slots) ?;
1100+ return Some ( hm. pattern ( ) ) ;
10981101 }
10991102 // There is an unfortunate special case where if the regex can
11001103 // match the empty string and UTF-8 mode is enabled, the search
@@ -1109,22 +1112,23 @@ impl PikeVM {
11091112 // this case.
11101113 let min = self . get_nfa ( ) . group_info ( ) . implicit_slot_len ( ) ;
11111114 if slots. len ( ) >= min {
1112- return self . search_slots_imp ( cache, input, slots) ;
1115+ let hm = self . search_slots_imp ( cache, input, slots) ?;
1116+ return Some ( hm. pattern ( ) ) ;
11131117 }
11141118 if self . get_nfa ( ) . pattern_len ( ) == 1 {
11151119 let mut enough = [ None , None ] ;
11161120 let got = self . search_slots_imp ( cache, input, & mut enough) ;
11171121 // This is OK because we know `enough` is strictly bigger than
11181122 // `slots`, otherwise this special case isn't reached.
11191123 slots. copy_from_slice ( & enough[ ..slots. len ( ) ] ) ;
1120- return got;
1124+ return got. map ( |hm| hm . pattern ( ) ) ;
11211125 }
11221126 let mut enough = vec ! [ None ; min] ;
11231127 let got = self . search_slots_imp ( cache, input, & mut enough) ;
11241128 // This is OK because we know `enough` is strictly bigger than `slots`,
11251129 // otherwise this special case isn't reached.
11261130 slots. copy_from_slice ( & enough[ ..slots. len ( ) ] ) ;
1127- got
1131+ got. map ( |hm| hm . pattern ( ) )
11281132 }
11291133
11301134 /// This is the actual implementation of `search_slots_imp` that
@@ -1137,30 +1141,17 @@ impl PikeVM {
11371141 cache : & mut Cache ,
11381142 input : & Input < ' _ > ,
11391143 slots : & mut [ Option < NonMaxUsize > ] ,
1140- ) -> Option < PatternID > {
1144+ ) -> Option < HalfMatch > {
11411145 let utf8empty = self . get_nfa ( ) . has_empty ( ) && self . get_nfa ( ) . is_utf8 ( ) ;
1142- let ( pid , end ) = match self . search_imp ( cache, input, slots) {
1146+ let hm = match self . search_imp ( cache, input, slots) {
11431147 None => return None ,
1144- Some ( pid) if !utf8empty => return Some ( pid) ,
1145- Some ( pid) => {
1146- let slot_start = pid. as_usize ( ) * 2 ;
1147- let slot_end = slot_start + 1 ;
1148- // OK because we know we have a match and we know our caller
1149- // provided slots are big enough (which we make true above if
1150- // the caller didn't). Namely, we're only here when 'utf8empty'
1151- // is true, and when that's true, we require slots for every
1152- // pattern.
1153- ( pid, slots[ slot_end] . unwrap ( ) . get ( ) )
1154- }
1148+ Some ( hm) if !utf8empty => return Some ( hm) ,
1149+ Some ( hm) => hm,
11551150 } ;
1156- empty:: skip_splits_fwd ( input, pid, end, |input| {
1157- let pid = match self . search_imp ( cache, input, slots) {
1158- None => return Ok ( None ) ,
1159- Some ( pid) => pid,
1160- } ;
1161- let slot_start = pid. as_usize ( ) * 2 ;
1162- let slot_end = slot_start + 1 ;
1163- Ok ( Some ( ( pid, slots[ slot_end] . unwrap ( ) . get ( ) ) ) )
1151+ empty:: skip_splits_fwd ( input, hm, hm. offset ( ) , |input| {
1152+ Ok ( self
1153+ . search_imp ( cache, input, slots)
1154+ . map ( |hm| ( hm, hm. offset ( ) ) ) )
11641155 } )
11651156 // OK because the PikeVM never errors.
11661157 . unwrap ( )
@@ -1235,7 +1226,7 @@ impl PikeVM {
12351226 cache : & mut Cache ,
12361227 input : & Input < ' _ > ,
12371228 slots : & mut [ Option < NonMaxUsize > ] ,
1238- ) -> Option < PatternID > {
1229+ ) -> Option < HalfMatch > {
12391230 cache. setup_search ( slots. len ( ) ) ;
12401231 if input. is_done ( ) {
12411232 return None ;
@@ -1264,7 +1255,7 @@ impl PikeVM {
12641255 let pre =
12651256 if anchored { None } else { self . get_config ( ) . get_prefilter ( ) } ;
12661257 let Cache { ref mut stack, ref mut curr, ref mut next } = cache;
1267- let mut pid = None ;
1258+ let mut hm = None ;
12681259 // Yes, our search doesn't end at input.end(), but includes it. This
12691260 // is necessary because matches are delayed by one byte, just like
12701261 // how the DFA engines work. The delay is used to handle look-behind
@@ -1283,7 +1274,7 @@ impl PikeVM {
12831274 if curr. set . is_empty ( ) {
12841275 // We have a match and we haven't been instructed to continue
12851276 // on even after finding a match, so we can quit.
1286- if pid . is_some ( ) && !allmatches {
1277+ if hm . is_some ( ) && !allmatches {
12871278 break ;
12881279 }
12891280 // If we're running an anchored search and we've advanced
@@ -1353,7 +1344,7 @@ impl PikeVM {
13531344 // search. If we re-computed it at every position, we would be
13541345 // simulating an unanchored search when we were tasked to perform
13551346 // an anchored search.
1356- if ( !pid . is_some ( ) || allmatches)
1347+ if ( !hm . is_some ( ) || allmatches)
13571348 && ( !anchored || at == input. start ( ) )
13581349 {
13591350 // Since we are adding to the 'curr' active states and since
@@ -1372,22 +1363,23 @@ impl PikeVM {
13721363 let slots = next. slot_table . all_absent ( ) ;
13731364 self . epsilon_closure ( stack, slots, curr, input, at, start_id) ;
13741365 }
1375- if let Some ( x) = self . nexts ( stack, curr, next, input, at, slots) {
1376- pid = Some ( x) ;
1366+ if let Some ( pid) = self . nexts ( stack, curr, next, input, at, slots)
1367+ {
1368+ hm = Some ( HalfMatch :: new ( pid, at) ) ;
13771369 }
13781370 // Unless the caller asked us to return early, we need to mush on
13791371 // to see if we can extend our match. (But note that 'nexts' will
13801372 // quit right after seeing a match when match_kind==LeftmostFirst,
13811373 // as is consistent with leftmost-first match priority.)
1382- if input. get_earliest ( ) && pid . is_some ( ) {
1374+ if input. get_earliest ( ) && hm . is_some ( ) {
13831375 break ;
13841376 }
13851377 core:: mem:: swap ( curr, next) ;
13861378 next. set . clear ( ) ;
13871379 at += 1 ;
13881380 }
13891381 instrument ! ( |c| c. eprint( & self . nfa) ) ;
1390- pid
1382+ hm
13911383 }
13921384
13931385 /// The implementation for the 'which_overlapping_matches' API. Basically,
0 commit comments