@@ -621,8 +621,7 @@ enum NormalizationForm {
621621#[ deriving( Clone ) ]
622622struct NormalizationIterator < ' self > {
623623 priv kind : NormalizationForm ,
624- priv index : uint ,
625- priv string : & ' self str ,
624+ priv iter : CharIterator < ' self > ,
626625 priv buffer : ~[ ( char , u8 ) ] ,
627626 priv sorted : bool
628627}
@@ -650,16 +649,17 @@ impl<'self> Iterator<char> for NormalizationIterator<'self> {
650649 NFKD => char:: decompose_compatible
651650 } ;
652651
653- while !self . sorted && self . index < self . string . len ( ) {
654- let CharRange { ch, next} = self . string . char_range_at ( self . index ) ;
655- self . index = next;
656- do decomposer( ch) |d| {
657- let class = canonical_combining_class ( d) ;
658- if class == 0 && !self . sorted {
659- canonical_sort ( self . buffer ) ;
660- self . sorted = true ;
652+ if !self . sorted {
653+ for ch in self . iter {
654+ do decomposer ( ch) |d| {
655+ let class = canonical_combining_class ( d) ;
656+ if class == 0 && !self . sorted {
657+ canonical_sort ( self . buffer ) ;
658+ self . sorted = true ;
659+ }
660+ self . buffer . push ( ( d, class) ) ;
661661 }
662- self . buffer . push ( ( d , class ) ) ;
662+ if self . sorted { break }
663663 }
664664 }
665665
@@ -678,7 +678,10 @@ impl<'self> Iterator<char> for NormalizationIterator<'self> {
678678 }
679679 }
680680
681- fn size_hint ( & self ) -> ( uint , Option < uint > ) { ( self . string . len ( ) , None ) }
681+ fn size_hint ( & self ) -> ( uint , Option < uint > ) {
682+ let ( lower, _) = self . iter . size_hint ( ) ;
683+ ( lower, None )
684+ }
682685}
683686
684687/// Replace all occurrences of one string with another
@@ -1588,8 +1591,7 @@ impl<'self> StrSlice<'self> for &'self str {
15881591 /// Returns the string in Unicode Normalization Form D (canonical decomposition)
15891592 fn nfd_iter(&self) -> NormalizationIterator<'self> {
15901593 NormalizationIterator {
1591- index: 0,
1592- string: *self,
1594+ iter: self.iter(),
15931595 buffer: ~[],
15941596 sorted: false,
15951597 kind: NFD
@@ -1599,8 +1601,7 @@ impl<'self> StrSlice<'self> for &'self str {
15991601 /// Returns the string in Unicode Normalization Form KD (compatibility decomposition)
16001602 fn nfkd_iter(&self) -> NormalizationIterator<'self> {
16011603 NormalizationIterator {
1602- index: 0,
1603- string: *self,
1604+ iter: self.iter(),
16041605 buffer: ~[],
16051606 sorted: false,
16061607 kind: NFKD
@@ -1672,6 +1673,7 @@ impl<'self> StrSlice<'self> for &'self str {
16721673 if count == end { end_byte = Some(idx); break; }
16731674 count += 1;
16741675 }
1676+ if begin_byte.is_none() && count == begin { begin_byte = Some(self.len()) }
16751677 if end_byte.is_none() && count == end { end_byte = Some(self.len()) }
16761678
16771679 match (begin_byte, end_byte) {
@@ -2659,8 +2661,11 @@ mod tests {
26592661 fn t(a: &str, b: &str, start: uint) {
26602662 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
26612663 }
2664+ t(" ", " ", 0);
26622665 t(" hello", " llo", 2);
26632666 t(" hello", " el", 1);
2667+ t(" αβλ", " β", 1);
2668+ t(" αβλ", " ", 3);
26642669 assert_eq!(" ะเทศไท", " ประเทศไทย中华Việt Nam ".slice_chars(2, 8));
26652670 }
26662671
0 commit comments