@@ -665,8 +665,14 @@ impl Wtf8 {
665665
666666 /// Returns an iterator for the string’s code points.
667667 #[ inline]
668- pub fn code_points ( & self ) -> Wtf8CodePoints < ' _ > {
669- Wtf8CodePoints { bytes : self . bytes . iter ( ) }
668+ pub fn code_points ( & self ) -> CodePoints < ' _ > {
669+ CodePoints { bytes : self . bytes . iter ( ) }
670+ }
671+
672+ /// Returns an iterator for the string’s code points.
673+ #[ inline]
674+ pub fn code_point_indices ( & self ) -> CodePointIndices < ' _ > {
675+ CodePointIndices { front_offset : 0 , iter : self . code_points ( ) }
670676 }
671677
672678 /// Access raw bytes of WTF-8 data
@@ -984,11 +990,11 @@ pub fn slice_error_fail(s: &Wtf8, begin: usize, end: usize) -> ! {
984990///
985991/// Created with the method `.code_points()`.
986992#[ derive( Clone ) ]
987- pub struct Wtf8CodePoints < ' a > {
993+ pub struct CodePoints < ' a > {
988994 bytes : slice:: Iter < ' a , u8 > ,
989995}
990996
991- impl Iterator for Wtf8CodePoints < ' _ > {
997+ impl Iterator for CodePoints < ' _ > {
992998 type Item = CodePoint ;
993999
9941000 #[ inline]
@@ -1004,11 +1010,66 @@ impl Iterator for Wtf8CodePoints<'_> {
10041010 }
10051011}
10061012
1013+ impl < ' a > CodePoints < ' a > {
1014+ /// Views the underlying data as a subslice of the original data.
1015+ #[ inline]
1016+ pub fn as_slice ( & self ) -> & Wtf8 {
1017+ // SAFETY: `CodePoints` is only made from a `Wtf8Str`, which guarantees
1018+ // the iter is valid WTF-8.
1019+ unsafe { Wtf8 :: from_bytes_unchecked ( self . bytes . as_slice ( ) ) }
1020+ }
1021+ }
1022+
1023+ /// An iterator over the code points of a WTF-8 string, and their positions.
1024+ ///
1025+ /// Created with the method `.code_point_indices()`.
1026+ #[ derive( Clone ) ]
1027+ pub struct CodePointIndices < ' a > {
1028+ front_offset : usize ,
1029+ iter : CodePoints < ' a > ,
1030+ }
1031+
1032+ impl Iterator for CodePointIndices < ' _ > {
1033+ type Item = ( usize , CodePoint ) ;
1034+
1035+ #[ inline]
1036+ fn next ( & mut self ) -> Option < Self :: Item > {
1037+ let pre_len = self . iter . bytes . len ( ) ;
1038+ match self . iter . next ( ) {
1039+ None => None ,
1040+ Some ( code_point) => {
1041+ let index = self . front_offset ;
1042+ let len = self . iter . bytes . len ( ) ;
1043+ self . front_offset += pre_len - len;
1044+ Some ( ( index, code_point) )
1045+ }
1046+ }
1047+ }
1048+
1049+ #[ inline]
1050+ fn count ( self ) -> usize {
1051+ self . iter . count ( )
1052+ }
1053+
1054+ #[ inline]
1055+ fn size_hint ( & self ) -> ( usize , Option < usize > ) {
1056+ self . iter . size_hint ( )
1057+ }
1058+ }
1059+
1060+ impl < ' a > CodePointIndices < ' a > {
1061+ /// Views the underlying data as a subslice of the original data.
1062+ #[ inline]
1063+ pub fn as_slice ( & self ) -> & Wtf8 {
1064+ self . iter . as_slice ( )
1065+ }
1066+ }
1067+
10071068/// Generates a wide character sequence for potentially ill-formed UTF-16.
10081069#[ stable( feature = "rust1" , since = "1.0.0" ) ]
10091070#[ derive( Clone ) ]
10101071pub struct EncodeWide < ' a > {
1011- code_points : Wtf8CodePoints < ' a > ,
1072+ code_points : CodePoints < ' a > ,
10121073 extra : u16 ,
10131074}
10141075
0 commit comments