4040
4141use crate :: cmp;
4242use crate :: cmp:: Ordering ;
43+ use crate :: convert:: TryInto as _;
4344use crate :: fmt;
4445use crate :: slice:: memchr;
4546
@@ -370,11 +371,17 @@ pub struct CharSearcher<'a> {
370371
371372 // safety invariant: `utf8_size` must be less than 5
372373 /// The number of bytes `needle` takes up when encoded in utf8.
373- utf8_size : usize ,
374+ utf8_size : u8 ,
374375 /// A utf8 encoded copy of the `needle`
375376 utf8_encoded : [ u8 ; 4 ] ,
376377}
377378
379+ impl CharSearcher < ' _ > {
380+ fn utf8_size ( & self ) -> usize {
381+ self . utf8_size . into ( )
382+ }
383+ }
384+
378385unsafe impl < ' a > Searcher < ' a > for CharSearcher < ' a > {
379386 #[ inline]
380387 fn haystack ( & self ) -> & ' a str {
@@ -414,7 +421,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
414421 let bytes = self . haystack . as_bytes ( ) . get ( self . finger ..self . finger_back ) ?;
415422 // the last byte of the utf8 encoded needle
416423 // SAFETY: we have an invariant that `utf8_size < 5`
417- let last_byte = unsafe { * self . utf8_encoded . get_unchecked ( self . utf8_size - 1 ) } ;
424+ let last_byte = unsafe { * self . utf8_encoded . get_unchecked ( self . utf8_size ( ) - 1 ) } ;
418425 if let Some ( index) = memchr:: memchr ( last_byte, bytes) {
419426 // The new finger is the index of the byte we found,
420427 // plus one, since we memchr'd for the last byte of the character.
@@ -434,10 +441,10 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
434441 // find something. When we find something the `finger` will be set
435442 // to a UTF8 boundary.
436443 self . finger += index + 1 ;
437- if self . finger >= self . utf8_size {
438- let found_char = self . finger - self . utf8_size ;
444+ if self . finger >= self . utf8_size ( ) {
445+ let found_char = self . finger - self . utf8_size ( ) ;
439446 if let Some ( slice) = self . haystack . as_bytes ( ) . get ( found_char..self . finger ) {
440- if slice == & self . utf8_encoded [ 0 ..self . utf8_size ] {
447+ if slice == & self . utf8_encoded [ 0 ..self . utf8_size ( ) ] {
441448 return Some ( ( found_char, self . finger ) ) ;
442449 }
443450 }
@@ -482,7 +489,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
482489 let bytes = haystack. get ( self . finger ..self . finger_back ) ?;
483490 // the last byte of the utf8 encoded needle
484491 // SAFETY: we have an invariant that `utf8_size < 5`
485- let last_byte = unsafe { * self . utf8_encoded . get_unchecked ( self . utf8_size - 1 ) } ;
492+ let last_byte = unsafe { * self . utf8_encoded . get_unchecked ( self . utf8_size ( ) - 1 ) } ;
486493 if let Some ( index) = memchr:: memrchr ( last_byte, bytes) {
487494 // we searched a slice that was offset by self.finger,
488495 // add self.finger to recoup the original index
@@ -493,14 +500,14 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
493500 // char in the paradigm of reverse iteration). For
494501 // multibyte chars we need to skip down by the number of more
495502 // bytes they have than ASCII
496- let shift = self . utf8_size - 1 ;
503+ let shift = self . utf8_size ( ) - 1 ;
497504 if index >= shift {
498505 let found_char = index - shift;
499- if let Some ( slice) = haystack. get ( found_char..( found_char + self . utf8_size ) ) {
500- if slice == & self . utf8_encoded [ 0 ..self . utf8_size ] {
506+ if let Some ( slice) = haystack. get ( found_char..( found_char + self . utf8_size ( ) ) ) {
507+ if slice == & self . utf8_encoded [ 0 ..self . utf8_size ( ) ] {
501508 // move finger to before the character found (i.e., at its start index)
502509 self . finger_back = found_char;
503- return Some ( ( self . finger_back , self . finger_back + self . utf8_size ) ) ;
510+ return Some ( ( self . finger_back , self . finger_back + self . utf8_size ( ) ) ) ;
504511 }
505512 }
506513 }
@@ -542,7 +549,12 @@ impl<'a> Pattern<'a> for char {
542549 #[ inline]
543550 fn into_searcher ( self , haystack : & ' a str ) -> Self :: Searcher {
544551 let mut utf8_encoded = [ 0 ; 4 ] ;
545- let utf8_size = self . encode_utf8 ( & mut utf8_encoded) . len ( ) ;
552+ let utf8_size = self
553+ . encode_utf8 ( & mut utf8_encoded)
554+ . len ( )
555+ . try_into ( )
556+ . expect ( "char len should be less than 255" ) ;
557+
546558 CharSearcher {
547559 haystack,
548560 finger : 0 ,
0 commit comments