@@ -59,6 +59,7 @@ enum GraphemeState {
5959 HangulLV ,
6060 HangulLVT ,
6161 Regional ,
62+ Zwj ,
6263}
6364
6465impl < ' a > Iterator for Graphemes < ' a > {
@@ -93,13 +94,14 @@ impl<'a> Iterator for Graphemes<'a> {
9394 _ => self . cat . take ( ) . unwrap ( )
9495 } ;
9596
96- if match cat {
97- gr:: GC_Extend => true ,
98- gr:: GC_SpacingMark if self . extended => true ,
99- _ => false
97+ if let Some ( new_state) = match cat {
98+ gr:: GC_Extend => Some ( FindExtend ) , // rule GB9
99+ gr:: GC_SpacingMark if self . extended => Some ( FindExtend ) , // rule GB9a
100+ gr:: GC_ZWJ => Some ( Zwj ) , // rule GB9/GB11
101+ _ => None
100102 } {
101- state = FindExtend ; // rule GB9/GB9a
102- continue ;
103+ state = new_state ;
104+ continue ;
103105 }
104106
105107 state = match state {
@@ -153,7 +155,14 @@ impl<'a> Iterator for Graphemes<'a> {
153155 take_curr = false ;
154156 break ;
155157 }
156- }
158+ } ,
159+ Zwj => match cat { // rule GB11: ZWJ x (GAZ|EBG)
160+ gr:: GC_Glue_After_Zwj | gr:: GC_E_Base_GAZ => continue ,
161+ _ => {
162+ take_curr = false ;
163+ break ;
164+ }
165+ } ,
157166 }
158167 }
159168
@@ -215,6 +224,8 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
215224 Start | FindExtend => match cat {
216225 gr:: GC_Extend => FindExtend ,
217226 gr:: GC_SpacingMark if self . extended => FindExtend ,
227+ gr:: GC_ZWJ => FindExtend ,
228+ gr:: GC_Glue_After_Zwj | gr:: GC_E_Base_GAZ => Zwj ,
218229 gr:: GC_L | gr:: GC_LV | gr:: GC_LVT => HangulL ,
219230 gr:: GC_V => HangulLV ,
220231 gr:: GC_T => HangulLVT ,
@@ -255,6 +266,13 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
255266 take_curr = false ;
256267 break ;
257268 }
269+ } ,
270+ Zwj => match cat { // char to right is (GAZ|EBG)
271+ gr:: GC_ZWJ => continue , // rule GB11: ZWJ x (GAZ|EBG)
272+ _ => {
273+ take_curr = false ;
274+ break ;
275+ }
258276 }
259277 }
260278 }
0 commit comments