@@ -178,6 +178,8 @@ pub struct GraphemeCursor {
178178 // Set if a call to `prev_boundary` or `next_boundary` was suspended due
179179 // to needing more input.
180180 resuming : bool ,
181+ // Cached grapheme category and associated scalar value range.
182+ grapheme_cat_cache : ( u32 , u32 , GraphemeCat ) ,
181183}
182184
183185/// An error return indicating that not enough content was available in the
@@ -276,9 +278,20 @@ impl GraphemeCursor {
276278 pre_context_offset : None ,
277279 ris_count : None ,
278280 resuming : false ,
281+ grapheme_cat_cache : ( 0 , 0 , GraphemeCat :: GC_Control ) ,
279282 }
280283 }
281284
285+ fn grapheme_category ( & mut self , ch : char ) -> GraphemeCat {
286+ use tables:: grapheme as gr;
287+ // If this char isn't within the cached range, update the cache to the
288+ // range that includes it.
289+ if ( ch as u32 ) < self . grapheme_cat_cache . 0 || ( ch as u32 ) > self . grapheme_cat_cache . 1 {
290+ self . grapheme_cat_cache = gr:: grapheme_category ( ch) ;
291+ }
292+ self . grapheme_cat_cache . 2
293+ }
294+
282295 // Not sure I'm gonna keep this, the advantage over new() seems thin.
283296
284297 /// Set the cursor to a new location in the same string.
@@ -349,7 +362,7 @@ impl GraphemeCursor {
349362 self . pre_context_offset = None ;
350363 if self . is_extended && chunk_start + chunk. len ( ) == self . offset {
351364 let ch = chunk. chars ( ) . rev ( ) . next ( ) . unwrap ( ) ;
352- if gr :: grapheme_category ( ch) == gr:: GC_Prepend {
365+ if self . grapheme_category ( ch) == gr:: GC_Prepend {
353366 self . decide ( false ) ; // GB9b
354367 return ;
355368 }
@@ -359,7 +372,7 @@ impl GraphemeCursor {
359372 GraphemeState :: Emoji => self . handle_emoji ( chunk, chunk_start) ,
360373 _ => if self . cat_before . is_none ( ) && self . offset == chunk. len ( ) + chunk_start {
361374 let ch = chunk. chars ( ) . rev ( ) . next ( ) . unwrap ( ) ;
362- self . cat_before = Some ( gr :: grapheme_category ( ch) ) ;
375+ self . cat_before = Some ( self . grapheme_category ( ch) ) ;
363376 } ,
364377 }
365378 }
@@ -393,7 +406,7 @@ impl GraphemeCursor {
393406 use tables:: grapheme as gr;
394407 let mut ris_count = self . ris_count . unwrap_or ( 0 ) ;
395408 for ch in chunk. chars ( ) . rev ( ) {
396- if gr :: grapheme_category ( ch) != gr:: GC_Regional_Indicator {
409+ if self . grapheme_category ( ch) != gr:: GC_Regional_Indicator {
397410 self . ris_count = Some ( ris_count) ;
398411 self . decide ( ( ris_count % 2 ) == 0 ) ;
399412 return ;
@@ -413,13 +426,13 @@ impl GraphemeCursor {
413426 use tables:: grapheme as gr;
414427 let mut iter = chunk. chars ( ) . rev ( ) ;
415428 if let Some ( ch) = iter. next ( ) {
416- if gr :: grapheme_category ( ch) != gr:: GC_ZWJ {
429+ if self . grapheme_category ( ch) != gr:: GC_ZWJ {
417430 self . decide ( true ) ;
418431 return ;
419432 }
420433 }
421434 for ch in iter {
422- match gr :: grapheme_category ( ch) {
435+ match self . grapheme_category ( ch) {
423436 gr:: GC_Extend => ( ) ,
424437 gr:: GC_Extended_Pictographic => {
425438 self . decide ( false ) ;
@@ -481,7 +494,7 @@ impl GraphemeCursor {
481494 let offset_in_chunk = self . offset - chunk_start;
482495 if self . cat_after . is_none ( ) {
483496 let ch = chunk[ offset_in_chunk..] . chars ( ) . next ( ) . unwrap ( ) ;
484- self . cat_after = Some ( gr :: grapheme_category ( ch) ) ;
497+ self . cat_after = Some ( self . grapheme_category ( ch) ) ;
485498 }
486499 if self . offset == chunk_start {
487500 let mut need_pre_context = true ;
@@ -497,7 +510,7 @@ impl GraphemeCursor {
497510 }
498511 if self . cat_before . is_none ( ) {
499512 let ch = chunk[ ..offset_in_chunk] . chars ( ) . rev ( ) . next ( ) . unwrap ( ) ;
500- self . cat_before = Some ( gr :: grapheme_category ( ch) ) ;
513+ self . cat_before = Some ( self . grapheme_category ( ch) ) ;
501514 }
502515 match check_pair ( self . cat_before . unwrap ( ) , self . cat_after . unwrap ( ) ) {
503516 PairResult :: NotBreak => return self . decision ( false ) ,
@@ -553,7 +566,6 @@ impl GraphemeCursor {
553566 /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(None));
554567 /// ```
555568 pub fn next_boundary ( & mut self , chunk : & str , chunk_start : usize ) -> Result < Option < usize > , GraphemeIncomplete > {
556- use tables:: grapheme as gr;
557569 if self . offset == self . len {
558570 return Ok ( None ) ;
559571 }
@@ -562,14 +574,14 @@ impl GraphemeCursor {
562574 loop {
563575 if self . resuming {
564576 if self . cat_after . is_none ( ) {
565- self . cat_after = Some ( gr :: grapheme_category ( ch) ) ;
577+ self . cat_after = Some ( self . grapheme_category ( ch) ) ;
566578 }
567579 } else {
568580 self . offset += ch. len_utf8 ( ) ;
569581 self . state = GraphemeState :: Unknown ;
570582 self . cat_before = self . cat_after . take ( ) ;
571583 if self . cat_before . is_none ( ) {
572- self . cat_before = Some ( gr :: grapheme_category ( ch) ) ;
584+ self . cat_before = Some ( self . grapheme_category ( ch) ) ;
573585 }
574586 if self . cat_before . unwrap ( ) == GraphemeCat :: GC_Regional_Indicator {
575587 self . ris_count = self . ris_count . map ( |c| c + 1 ) ;
@@ -578,7 +590,7 @@ impl GraphemeCursor {
578590 }
579591 if let Some ( next_ch) = iter. next ( ) {
580592 ch = next_ch;
581- self . cat_after = Some ( gr :: grapheme_category ( ch) ) ;
593+ self . cat_after = Some ( self . grapheme_category ( ch) ) ;
582594 } else if self . offset == self . len {
583595 self . decide ( true ) ;
584596 } else {
@@ -629,7 +641,6 @@ impl GraphemeCursor {
629641 /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(None));
630642 /// ```
631643 pub fn prev_boundary ( & mut self , chunk : & str , chunk_start : usize ) -> Result < Option < usize > , GraphemeIncomplete > {
632- use tables:: grapheme as gr;
633644 if self . offset == 0 {
634645 return Ok ( None ) ;
635646 }
@@ -644,7 +655,7 @@ impl GraphemeCursor {
644655 return Err ( GraphemeIncomplete :: PrevChunk ) ;
645656 }
646657 if self . resuming {
647- self . cat_before = Some ( gr :: grapheme_category ( ch) ) ;
658+ self . cat_before = Some ( self . grapheme_category ( ch) ) ;
648659 } else {
649660 self . offset -= ch. len_utf8 ( ) ;
650661 self . cat_after = self . cat_before . take ( ) ;
@@ -654,12 +665,12 @@ impl GraphemeCursor {
654665 }
655666 if let Some ( prev_ch) = iter. next ( ) {
656667 ch = prev_ch;
657- self . cat_before = Some ( gr :: grapheme_category ( ch) ) ;
668+ self . cat_before = Some ( self . grapheme_category ( ch) ) ;
658669 } else if self . offset == 0 {
659670 self . decide ( true ) ;
660671 } else {
661672 self . resuming = true ;
662- self . cat_after = Some ( gr :: grapheme_category ( ch) ) ;
673+ self . cat_after = Some ( self . grapheme_category ( ch) ) ;
663674 return Err ( GraphemeIncomplete :: PrevChunk ) ;
664675 }
665676 }
0 commit comments