@@ -711,11 +711,6 @@ pub struct Compiler {
711711 /// State used for caching common suffixes when compiling reverse UTF-8
712712 /// automata (for Unicode character classes).
713713 utf8_suffix : RefCell < Utf8SuffixMap > ,
714- /// Top level alternation state which is used to run all look-around
715- /// assertion checks in lockstep with the main expression. Each look-around
716- /// expression is compiled to a set of states that is patched into this
717- /// state, and this state is updated on each new pattern being compiled.
718- lookaround_alt : RefCell < Option < StateID > > ,
719714 /// The next index to use for a look-around expression.
720715 lookaround_index : RefCell < SmallIndex > ,
721716}
@@ -730,7 +725,6 @@ impl Compiler {
730725 utf8_state : RefCell :: new ( Utf8State :: new ( ) ) ,
731726 trie_state : RefCell :: new ( RangeTrie :: new ( ) ) ,
732727 utf8_suffix : RefCell :: new ( Utf8SuffixMap :: new ( 1000 ) ) ,
733- lookaround_alt : RefCell :: new ( None ) ,
734728 lookaround_index : RefCell :: new ( SmallIndex :: ZERO ) ,
735729 }
736730 }
@@ -993,32 +987,11 @@ impl Compiler {
993987
994988 let compiled = self . c_alt_iter ( exprs. iter ( ) . map ( |e| {
995989 let _ = self . start_pattern ( ) ?;
996- let has_lookarounds =
997- ( e. borrow ( ) as & Hir ) . properties ( ) . contains_lookaround_expr ( ) ;
998- let mut top_level_alt = if has_lookarounds {
999- self . add_union ( ) ?
1000- } else {
1001- StateID :: ZERO
1002- } ;
1003- if has_lookarounds {
1004- let lookaround_prefix =
1005- self . c_at_least ( & Hir :: dot ( hir:: Dot :: AnyByte ) , false , 0 ) ?;
1006- let lookaround_alt = self . add_union ( ) ?;
1007- self . patch ( lookaround_prefix. end , lookaround_alt) ?;
1008- self . patch ( top_level_alt, lookaround_prefix. start ) ?;
1009- self . lookaround_alt . borrow_mut ( ) . replace ( lookaround_alt) ;
1010- }
1011990 let one = self . c_cap ( 0 , None , e. borrow ( ) ) ?;
1012991 let match_state_id = self . add_match ( ) ?;
1013992 self . patch ( one. end , match_state_id) ?;
1014- if has_lookarounds {
1015- self . patch ( top_level_alt, one. start ) ?;
1016- } else {
1017- top_level_alt = one. start ;
1018- }
1019- let _ = self . finish_pattern ( top_level_alt) ?;
1020- self . lookaround_alt . borrow_mut ( ) . take ( ) ;
1021- Ok ( ThompsonRef { start : top_level_alt, end : match_state_id } )
993+ let _ = self . finish_pattern ( one. start ) ?;
994+ Ok ( ThompsonRef { start : one. start , end : match_state_id } )
1022995 } ) ) ?;
1023996 self . patch ( unanchored_prefix. end , compiled. start ) ?;
1024997 let nfa = self
@@ -1052,25 +1025,25 @@ impl Compiler {
10521025 & self ,
10531026 lookaround : & LookAround ,
10541027 ) -> Result < ThompsonRef , BuildError > {
1055- let sub = self . c ( lookaround. sub ( ) ) ?;
1056- let pos = match lookaround {
1057- LookAround :: NegativeLookBehind ( _) => false ,
1058- LookAround :: PositiveLookBehind ( _) => true ,
1059- } ;
10601028 let idx = * self . lookaround_index . borrow ( ) ;
10611029 * self . lookaround_index . borrow_mut ( ) = SmallIndex :: new ( idx. one_more ( ) )
10621030 . map_err ( |e| {
10631031 BuildError :: too_many_lookarounds ( e. attempted ( ) as usize )
10641032 } ) ?;
1033+ let pos = match lookaround {
1034+ LookAround :: NegativeLookBehind ( _) => false ,
1035+ LookAround :: PositiveLookBehind ( _) => true ,
1036+ } ;
10651037 let check = self . add_check_lookaround ( idx, pos) ?;
1038+
1039+ let unanchored =
1040+ self . c_at_least ( & Hir :: dot ( hir:: Dot :: AnyByte ) , false , 0 ) ?;
1041+
1042+ let sub = self . c ( lookaround. sub ( ) ) ?;
10661043 let write = self . add_write_lookaround ( idx) ?;
1044+ self . patch ( unanchored. end , sub. start ) ?;
10671045 self . patch ( sub. end , write) ?;
1068- self . patch (
1069- self . lookaround_alt
1070- . borrow ( )
1071- . expect ( "Cannot compile look-around outside pattern" ) ,
1072- sub. start ,
1073- ) ?;
1046+ self . builder . borrow_mut ( ) . start_look_behind ( unanchored. start ) ;
10741047 Ok ( ThompsonRef { start : check, end : check } )
10751048 }
10761049
@@ -2169,13 +2142,12 @@ mod tests {
21692142 & [
21702143 s_bin_union( 2 , 1 ) ,
21712144 s_range( 0 , 255 , 0 ) ,
2172- s_bin_union ( 3 , 6 ) ,
2145+ s_check_lookaround ( 0 , true , 7 ) ,
21732146 s_bin_union( 5 , 4 ) ,
21742147 s_range( 0 , 255 , 3 ) ,
2175- s_look( Look :: Start , 7 ) ,
2176- s_check_lookaround( 0 , true , 8 ) ,
2148+ s_look( Look :: Start , 6 ) ,
21772149 s_write_lookaround( 0 ) ,
2178- s_byte( b'a' , 9 ) ,
2150+ s_byte( b'a' , 8 ) ,
21792151 s_match( 0 )
21802152 ]
21812153 ) ;
@@ -2310,28 +2282,27 @@ mod tests {
23102282 assert_eq ! (
23112283 build( r"(?<=a)" ) . states( ) ,
23122284 & [
2313- s_bin_union ( 1 , 4 ) ,
2285+ s_check_lookaround ( 0 , true , 5 ) ,
23142286 s_bin_union( 3 , 2 ) ,
23152287 s_range( b'\x00' , b'\xFF' , 1 ) ,
2316- s_byte( b'a' , 5 ) ,
2317- s_check_lookaround( 0 , true , 6 ) ,
2288+ s_byte( b'a' , 4 ) ,
23182289 s_write_lookaround( 0 ) ,
23192290 s_match( 0 )
23202291 ]
23212292 ) ;
23222293 assert_eq ! (
23232294 build( r"(?<=a(?<!b))" ) . states( ) ,
23242295 & [
2325- s_bin_union ( 1 , 8 ) ,
2296+ s_check_lookaround ( 0 , true , 10 ) ,
23262297 s_bin_union( 3 , 2 ) ,
23272298 s_range( b'\x00' , b'\xFF' , 1 ) ,
2328- s_bin_union( 5 , 4 ) ,
2329- s_byte( b'a' , 6 ) ,
2330- s_byte( b'b' , 7 ) ,
2331- s_check_lookaround( 0 , false , 9 ) ,
2332- s_write_lookaround( 0 ) ,
2333- s_check_lookaround( 1 , true , 10 ) ,
2299+ s_byte( b'a' , 4 ) ,
2300+ s_check_lookaround( 1 , false , 9 ) ,
2301+ s_bin_union( 7 , 6 ) ,
2302+ s_range( b'\x00' , b'\xFF' , 5 ) ,
2303+ s_byte( b'b' , 8 ) ,
23342304 s_write_lookaround( 1 ) ,
2305+ s_write_lookaround( 0 ) ,
23352306 s_match( 0 )
23362307 ]
23372308 ) ;
0 commit comments