@@ -1378,7 +1378,9 @@ impl<'a> Fsm<'a> {
13781378 ( ( empty_flags. end as u8 ) << 1 ) |
13791379 ( ( empty_flags. start_line as u8 ) << 2 ) |
13801380 ( ( empty_flags. end_line as u8 ) << 3 ) |
1381- ( ( state_flags. is_word ( ) as u8 ) << 4 ) )
1381+ ( ( empty_flags. word_boundary as u8 ) << 4 ) |
1382+ ( ( empty_flags. not_word_boundary as u8 ) << 5 ) |
1383+ ( ( state_flags. is_word ( ) as u8 ) << 6 ) )
13821384 as usize
13831385 } ;
13841386 match self . cache . start_states [ flagi] {
@@ -1412,9 +1414,17 @@ impl<'a> Fsm<'a> {
14121414 empty_flags. end = text. len ( ) == 0 ;
14131415 empty_flags. start_line = at == 0 || text[ at - 1 ] == b'\n' ;
14141416 empty_flags. end_line = text. len ( ) == 0 ;
1415- if at > 0 && Byte :: byte ( text[ at - 1 ] ) . is_ascii_word ( ) {
1417+
1418+ let is_word_last = at > 0 && Byte :: byte ( text[ at - 1 ] ) . is_ascii_word ( ) ;
1419+ let is_word = at < text. len ( ) && Byte :: byte ( text[ at] ) . is_ascii_word ( ) ;
1420+ if is_word_last {
14161421 state_flags. set_word ( ) ;
14171422 }
1423+ if is_word == is_word_last {
1424+ empty_flags. not_word_boundary = true ;
1425+ } else {
1426+ empty_flags. word_boundary = true ;
1427+ }
14181428 ( empty_flags, state_flags)
14191429 }
14201430
@@ -1433,9 +1443,18 @@ impl<'a> Fsm<'a> {
14331443 empty_flags. end = text. len ( ) == 0 ;
14341444 empty_flags. start_line = at == text. len ( ) || text[ at] == b'\n' ;
14351445 empty_flags. end_line = text. len ( ) == 0 ;
1436- if at < text. len ( ) && Byte :: byte ( text[ at] ) . is_ascii_word ( ) {
1446+
1447+ let is_word_last =
1448+ at < text. len ( ) && Byte :: byte ( text[ at] ) . is_ascii_word ( ) ;
1449+ let is_word = at > 0 && Byte :: byte ( text[ at - 1 ] ) . is_ascii_word ( ) ;
1450+ if is_word_last {
14371451 state_flags. set_word ( ) ;
14381452 }
1453+ if is_word == is_word_last {
1454+ empty_flags. not_word_boundary = true ;
1455+ } else {
1456+ empty_flags. word_boundary = true ;
1457+ }
14391458 ( empty_flags, state_flags)
14401459 }
14411460
0 commit comments