@@ -83,6 +83,7 @@ enum UWordBoundsState {
8383 Regional ,
8484 FormatExtend ( FormatExtendType ) ,
8585 Zwj ,
86+ Emoji ,
8687}
8788
8889// subtypes for FormatExtend state in UWordBoundsState
@@ -163,6 +164,7 @@ impl<'a> Iterator for UWordBounds<'a> {
163164 wd:: WC_Regional_Indicator => Regional , // rule WB13c
164165 wd:: WC_LF | wd:: WC_Newline => break , // rule WB3a
165166 wd:: WC_ZWJ => Zwj , // rule WB3c
167+ wd:: WC_E_Base | wd:: WC_E_Base_GAZ => Emoji , // rule WB14
166168 _ => {
167169 if let Some ( ncat) = self . get_next_cat ( idx) { // rule WB4
168170 if ncat == wd:: WC_Format || ncat == wd:: WC_Extend || ncat == wd:: WC_ZWJ {
@@ -245,6 +247,13 @@ impl<'a> Iterator for UWordBounds<'a> {
245247 break ;
246248 }
247249 } ,
250+ Emoji => match cat { // rule WB14
251+ wd:: WC_E_Modifier => continue ,
252+ _ => {
253+ take_curr = false ;
254+ break ;
255+ }
256+ } ,
248257 FormatExtend ( t) => match t { // handle FormatExtends depending on what type
249258 RequireNumeric if cat == wd:: WC_Numeric => Numeric , // rule WB11
250259 RequireLetter | AcceptQLetter if cat == wd:: WC_ALetter => Letter , // rule WB7
@@ -355,6 +364,7 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
355364 saveidx = idx;
356365 FormatExtend ( AcceptQLetter ) // rule WB7a
357366 } ,
367+ wd:: WC_E_Modifier => Emoji , // rule WB14
358368 wd:: WC_CR | wd:: WC_LF | wd:: WC_Newline => {
359369 if state == Start {
360370 if cat == wd:: WC_LF {
@@ -435,6 +445,13 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
435445 break ;
436446 }
437447 } ,
448+ Emoji => match cat { // rule WB14
449+ wd:: WC_E_Base | wd:: WC_E_Base_GAZ => continue ,
450+ _ => {
451+ take_curr = false ;
452+ break ;
453+ }
454+ } ,
438455 FormatExtend ( t) => match t {
439456 RequireNumeric if cat == wd:: WC_Numeric => Numeric , // rule WB12
440457 RequireLetter if cat == wd:: WC_ALetter => Letter , // rule WB6
0 commit comments