@@ -269,10 +269,10 @@ pub trait CharExt {
269269 fn len_utf8 ( self ) -> usize ;
270270 #[ stable( feature = "core" , since = "1.6.0" ) ]
271271 fn len_utf16 ( self ) -> usize ;
272- #[ stable ( feature = "core " , since = "1.6.0 " ) ]
273- fn encode_utf8 ( self , dst : & mut [ u8 ] ) -> Option < usize > ;
274- #[ stable ( feature = "core " , since = "1.6.0 " ) ]
275- fn encode_utf16 ( self , dst : & mut [ u16 ] ) -> Option < usize > ;
272+ #[ unstable ( feature = "unicode " , issue = "27784 " ) ]
273+ fn encode_utf8 ( self ) -> EncodeUtf8 ;
274+ #[ unstable ( feature = "unicode " , issue = "27784 " ) ]
275+ fn encode_utf16 ( self ) -> EncodeUtf16 ;
276276}
277277
278278#[ stable( feature = "core" , since = "1.6.0" ) ]
@@ -336,75 +336,47 @@ impl CharExt for char {
336336 }
337337
338338 #[ inline]
339- fn encode_utf8 ( self , dst : & mut [ u8 ] ) -> Option < usize > {
340- encode_utf8_raw ( self as u32 , dst)
339+ fn encode_utf8 ( self ) -> EncodeUtf8 {
340+ let code = self as u32 ;
341+ let mut buf = [ 0 ; 4 ] ;
342+ let pos = if code < MAX_ONE_B {
343+ buf[ 3 ] = code as u8 ;
344+ 3
345+ } else if code < MAX_TWO_B {
346+ buf[ 2 ] = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
347+ buf[ 3 ] = ( code & 0x3F ) as u8 | TAG_CONT ;
348+ 2
349+ } else if code < MAX_THREE_B {
350+ buf[ 1 ] = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
351+ buf[ 2 ] = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
352+ buf[ 3 ] = ( code & 0x3F ) as u8 | TAG_CONT ;
353+ 1
354+ } else {
355+ buf[ 0 ] = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
356+ buf[ 1 ] = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
357+ buf[ 2 ] = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
358+ buf[ 3 ] = ( code & 0x3F ) as u8 | TAG_CONT ;
359+ 0
360+ } ;
361+ EncodeUtf8 { buf : buf, pos : pos }
341362 }
342363
343364 #[ inline]
344- fn encode_utf16 ( self , dst : & mut [ u16 ] ) -> Option < usize > {
345- encode_utf16_raw ( self as u32 , dst)
346- }
347- }
348-
349- /// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
350- /// and then returns the number of bytes written.
351- ///
352- /// If the buffer is not large enough, nothing will be written into it
353- /// and a `None` will be returned.
354- #[ inline]
355- #[ unstable( feature = "char_internals" ,
356- reason = "this function should not be exposed publicly" ,
357- issue = "0" ) ]
358- #[ doc( hidden) ]
359- pub fn encode_utf8_raw ( code : u32 , dst : & mut [ u8 ] ) -> Option < usize > {
360- // Marked #[inline] to allow llvm optimizing it away
361- if code < MAX_ONE_B && !dst. is_empty ( ) {
362- dst[ 0 ] = code as u8 ;
363- Some ( 1 )
364- } else if code < MAX_TWO_B && dst. len ( ) >= 2 {
365- dst[ 0 ] = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
366- dst[ 1 ] = ( code & 0x3F ) as u8 | TAG_CONT ;
367- Some ( 2 )
368- } else if code < MAX_THREE_B && dst. len ( ) >= 3 {
369- dst[ 0 ] = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
370- dst[ 1 ] = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
371- dst[ 2 ] = ( code & 0x3F ) as u8 | TAG_CONT ;
372- Some ( 3 )
373- } else if dst. len ( ) >= 4 {
374- dst[ 0 ] = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
375- dst[ 1 ] = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
376- dst[ 2 ] = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
377- dst[ 3 ] = ( code & 0x3F ) as u8 | TAG_CONT ;
378- Some ( 4 )
379- } else {
380- None
381- }
382- }
383-
384- /// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
385- /// and then returns the number of `u16`s written.
386- ///
387- /// If the buffer is not large enough, nothing will be written into it
388- /// and a `None` will be returned.
389- #[ inline]
390- #[ unstable( feature = "char_internals" ,
391- reason = "this function should not be exposed publicly" ,
392- issue = "0" ) ]
393- #[ doc( hidden) ]
394- pub fn encode_utf16_raw ( mut ch : u32 , dst : & mut [ u16 ] ) -> Option < usize > {
395- // Marked #[inline] to allow llvm optimizing it away
396- if ( ch & 0xFFFF ) == ch && !dst. is_empty ( ) {
397- // The BMP falls through (assuming non-surrogate, as it should)
398- dst[ 0 ] = ch as u16 ;
399- Some ( 1 )
400- } else if dst. len ( ) >= 2 {
401- // Supplementary planes break into surrogates.
402- ch -= 0x1_0000 ;
403- dst[ 0 ] = 0xD800 | ( ( ch >> 10 ) as u16 ) ;
404- dst[ 1 ] = 0xDC00 | ( ( ch as u16 ) & 0x3FF ) ;
405- Some ( 2 )
406- } else {
407- None
365+ fn encode_utf16 ( self ) -> EncodeUtf16 {
366+ let mut buf = [ 0 ; 2 ] ;
367+ let mut code = self as u32 ;
368+ let pos = if ( code & 0xFFFF ) == code {
369+ // The BMP falls through (assuming non-surrogate, as it should)
370+ buf[ 1 ] = code as u16 ;
371+ 1
372+ } else {
373+ // Supplementary planes break into surrogates.
374+ code -= 0x1_0000 ;
375+ buf[ 0 ] = 0xD800 | ( ( code >> 10 ) as u16 ) ;
376+ buf[ 1 ] = 0xDC00 | ( ( code as u16 ) & 0x3FF ) ;
377+ 0
378+ } ;
379+ EncodeUtf16 { buf : buf, pos : pos }
408380 }
409381}
410382
@@ -583,3 +555,80 @@ impl Iterator for EscapeDefault {
583555 }
584556 }
585557}
558+
559+ /// An iterator over `u8` entries represending the UTF-8 encoding of a `char`
560+ /// value.
561+ ///
562+ /// Constructed via the `.encode_utf8()` method on `char`.
563+ #[ unstable( feature = "unicode" , issue = "27784" ) ]
564+ #[ derive( Debug ) ]
565+ pub struct EncodeUtf8 {
566+ buf : [ u8 ; 4 ] ,
567+ pos : usize ,
568+ }
569+
570+ impl EncodeUtf8 {
571+ /// Returns the remaining bytes of this iterator as a slice.
572+ #[ unstable( feature = "unicode" , issue = "27784" ) ]
573+ pub fn as_slice ( & self ) -> & [ u8 ] {
574+ & self . buf [ self . pos ..]
575+ }
576+ }
577+
578+ #[ unstable( feature = "unicode" , issue = "27784" ) ]
579+ impl Iterator for EncodeUtf8 {
580+ type Item = u8 ;
581+
582+ fn next ( & mut self ) -> Option < u8 > {
583+ if self . pos == self . buf . len ( ) {
584+ None
585+ } else {
586+ let ret = Some ( self . buf [ self . pos ] ) ;
587+ self . pos += 1 ;
588+ ret
589+ }
590+ }
591+
592+ fn size_hint ( & self ) -> ( usize , Option < usize > ) {
593+ self . as_slice ( ) . iter ( ) . size_hint ( )
594+ }
595+ }
596+
597+ /// An iterator over `u16` entries represending the UTF-16 encoding of a `char`
598+ /// value.
599+ ///
600+ /// Constructed via the `.encode_utf16()` method on `char`.
601+ #[ unstable( feature = "unicode" , issue = "27784" ) ]
602+ #[ derive( Debug ) ]
603+ pub struct EncodeUtf16 {
604+ buf : [ u16 ; 2 ] ,
605+ pos : usize ,
606+ }
607+
608+ impl EncodeUtf16 {
609+ /// Returns the remaining bytes of this iterator as a slice.
610+ #[ unstable( feature = "unicode" , issue = "27784" ) ]
611+ pub fn as_slice ( & self ) -> & [ u16 ] {
612+ & self . buf [ self . pos ..]
613+ }
614+ }
615+
616+
617+ #[ unstable( feature = "unicode" , issue = "27784" ) ]
618+ impl Iterator for EncodeUtf16 {
619+ type Item = u16 ;
620+
621+ fn next ( & mut self ) -> Option < u16 > {
622+ if self . pos == self . buf . len ( ) {
623+ None
624+ } else {
625+ let ret = Some ( self . buf [ self . pos ] ) ;
626+ self . pos += 1 ;
627+ ret
628+ }
629+ }
630+
631+ fn size_hint ( & self ) -> ( usize , Option < usize > ) {
632+ self . as_slice ( ) . iter ( ) . size_hint ( )
633+ }
634+ }
0 commit comments