@@ -58,6 +58,7 @@ enum GraphemeState {
5858 HangulL ,
5959 HangulLV ,
6060 HangulLVT ,
61+ Prepend ,
6162 Regional ,
6263 Emoji ,
6364 Zwj ,
@@ -123,6 +124,7 @@ impl<'a> Iterator for Graphemes<'a> {
123124 gr:: GC_L => HangulL ,
124125 gr:: GC_LV | gr:: GC_V => HangulLV ,
125126 gr:: GC_LVT | gr:: GC_T => HangulLVT ,
127+ gr:: GC_Prepend if self . extended => Prepend ,
126128 gr:: GC_Regional_Indicator => Regional ,
127129 gr:: GC_E_Base | gr:: GC_E_Base_GAZ => Emoji ,
128130 _ => FindExtend
@@ -155,6 +157,13 @@ impl<'a> Iterator for Graphemes<'a> {
155157 break ;
156158 }
157159 } ,
160+ Prepend => match cat { // rule GB9b
161+ gr:: GC_Control => {
162+ take_curr = false ;
163+ break ;
164+ }
165+ _ => continue
166+ } ,
158167 Regional => match cat { // rule GB12/GB13
159168 gr:: GC_Regional_Indicator => FindExtend ,
160169 _ => {
@@ -276,6 +285,10 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
276285 break ;
277286 }
278287 } ,
288+ Prepend => {
289+ // not used in reverse iteration
290+ unreachable ! ( )
291+ } ,
279292 Regional => { // rule GB12/GB13
280293 // Need to scan backward to find if this is preceded by an odd or even number
281294 // of Regional_Indicator characters.
@@ -340,6 +353,17 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
340353 Some ( cat)
341354 } ;
342355
356+ if self . extended && cat != gr:: GC_Control {
357+ // rule GB9b: include any preceding Prepend characters
358+ for ( i, c) in self . string [ ..idx] . char_indices ( ) . rev ( ) {
359+ // TODO: Cache this to avoid repeated lookups in the common case.
360+ match gr:: grapheme_category ( c) {
361+ gr:: GC_Prepend => idx = i,
362+ _ => break
363+ }
364+ }
365+ }
366+
343367 let retstr = & self . string [ idx..] ;
344368 self . string = & self . string [ ..idx] ;
345369 Some ( retstr)
0 commit comments