@@ -80,9 +80,9 @@ enum UWordBoundsState {
8080 Numeric ,
8181 Katakana ,
8282 ExtendNumLet ,
83- Regional ,
83+ Regional ( /* half */ bool ) ,
8484 FormatExtend ( FormatExtendType ) ,
85- Zwj ( bool ) ,
85+ Zwj ( /* tainted */ bool ) ,
8686 Emoji ,
8787}
8888
@@ -184,7 +184,7 @@ impl<'a> Iterator for UWordBounds<'a> {
184184 wd:: WC_Numeric => Numeric , // rule WB8, WB10, WB12, WB13a
185185 wd:: WC_Katakana => Katakana , // rule WB13, WB13a
186186 wd:: WC_ExtendNumLet => ExtendNumLet , // rule WB13a, WB13b
187- wd:: WC_Regional_Indicator => Regional , // rule WB13c
187+ wd:: WC_Regional_Indicator => Regional ( /* half = */ true ) , // rule WB13c
188188 wd:: WC_LF | wd:: WC_Newline => break , // rule WB3a
189189 wd:: WC_ZWJ => Zwj ( false ) , // rule WB3c
190190 wd:: WC_E_Base | wd:: WC_E_Base_GAZ => Emoji , // rule WB14
@@ -269,8 +269,15 @@ impl<'a> Iterator for UWordBounds<'a> {
269269 break ;
270270 }
271271 } ,
272- Regional => match cat {
273- wd:: WC_Regional_Indicator => Regional , // rule WB13c
272+ Regional ( false ) => {
273+ // if it reaches here we've gone too far,
274+ // a full flag can only compose with ZWJ/Extend/Format
275+ // proceeding it.
276+ take_curr = false ;
277+ break ;
278+ }
279+ Regional ( /* half */ true ) => match cat {
280+ wd:: WC_Regional_Indicator => Regional ( false ) , // rule WB13c
274281 _ => {
275282 take_curr = false ;
276283 break ;
@@ -385,7 +392,7 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
385392 wd:: WC_Numeric => Numeric , // rule WB8, WB9, WB11, WB13b
386393 wd:: WC_Katakana => Katakana , // rule WB13, WB13b
387394 wd:: WC_ExtendNumLet => ExtendNumLet , // rule WB13a
388- wd:: WC_Regional_Indicator => Regional , // rule WB13c
395+ wd:: WC_Regional_Indicator => Regional ( true ) , // rule WB13c
389396 wd:: WC_Glue_After_Zwj | wd:: WC_E_Base_GAZ => Zwj ( false ) , // rule WB3c
390397 // rule WB4:
391398 wd:: WC_Extend | wd:: WC_Format | wd:: WC_ZWJ => FormatExtend ( AcceptAny ) ,
@@ -467,8 +474,8 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
467474 break ;
468475 }
469476 } ,
470- Regional => match cat {
471- wd:: WC_Regional_Indicator => Regional , // rule WB13c
477+ Regional ( _ ) => match cat {
478+ wd:: WC_Regional_Indicator => Regional ( true ) , // rule WB13c
472479 _ => {
473480 take_curr = false ;
474481 break ;
0 commit comments