@@ -593,16 +593,7 @@ impl char {
593593 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
594594 #[ inline]
595595 pub fn len_utf8 ( self ) -> usize {
596- let code = self as u32 ;
597- if code < MAX_ONE_B {
598- 1
599- } else if code < MAX_TWO_B {
600- 2
601- } else if code < MAX_THREE_B {
602- 3
603- } else {
604- 4
605- }
596+ len_utf8 ( self as u32 )
606597 }
607598
608599 /// Returns the number of 16-bit code units this `char` would need if
@@ -670,36 +661,7 @@ impl char {
670661 #[ stable( feature = "unicode_encode_char" , since = "1.15.0" ) ]
671662 #[ inline]
672663 pub fn encode_utf8 ( self , dst : & mut [ u8 ] ) -> & mut str {
673- let code = self as u32 ;
674- let len = self . len_utf8 ( ) ;
675- match ( len, & mut dst[ ..] ) {
676- ( 1 , [ a, ..] ) => {
677- * a = code as u8 ;
678- }
679- ( 2 , [ a, b, ..] ) => {
680- * a = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
681- * b = ( code & 0x3F ) as u8 | TAG_CONT ;
682- }
683- ( 3 , [ a, b, c, ..] ) => {
684- * a = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
685- * b = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
686- * c = ( code & 0x3F ) as u8 | TAG_CONT ;
687- }
688- ( 4 , [ a, b, c, d, ..] ) => {
689- * a = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
690- * b = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
691- * c = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
692- * d = ( code & 0x3F ) as u8 | TAG_CONT ;
693- }
694- _ => panic ! (
695- "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}" ,
696- len,
697- code,
698- dst. len( ) ,
699- ) ,
700- } ;
701- // SAFETY: We just wrote UTF-8 content in, so converting to str is fine.
702- unsafe { from_utf8_unchecked_mut ( & mut dst[ ..len] ) }
664+ encode_utf8_raw ( self as u32 , dst)
703665 }
704666
705667 /// Encodes this character as UTF-16 into the provided `u16` buffer,
@@ -1673,3 +1635,60 @@ impl char {
16731635 }
16741636 }
16751637}
1638+
1639+ #[ inline]
1640+ fn len_utf8 ( code : u32 ) -> usize {
1641+ if code < MAX_ONE_B {
1642+ 1
1643+ } else if code < MAX_TWO_B {
1644+ 2
1645+ } else if code < MAX_THREE_B {
1646+ 3
1647+ } else {
1648+ 4
1649+ }
1650+ }
1651+
1652+ /// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
1653+ /// and then returns the subslice of the buffer that contains the encoded character.
1654+ ///
1655+ /// Unlike `char::encode_utf8`, this method can be called on codepoints in the surrogate range.
1656+ ///
1657+ /// # Panics
1658+ ///
1659+ /// Panics if the buffer is not large enough.
1660+ /// A buffer of length four is large enough to encode any `char`.
1661+ #[ unstable( feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" ) ]
1662+ #[ doc( hidden) ]
1663+ #[ inline]
1664+ pub fn encode_utf8_raw ( code : u32 , dst : & mut [ u8 ] ) -> & mut str {
1665+ let len = len_utf8 ( code) ;
1666+ match ( len, & mut dst[ ..] ) {
1667+ ( 1 , [ a, ..] ) => {
1668+ * a = code as u8 ;
1669+ }
1670+ ( 2 , [ a, b, ..] ) => {
1671+ * a = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
1672+ * b = ( code & 0x3F ) as u8 | TAG_CONT ;
1673+ }
1674+ ( 3 , [ a, b, c, ..] ) => {
1675+ * a = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
1676+ * b = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
1677+ * c = ( code & 0x3F ) as u8 | TAG_CONT ;
1678+ }
1679+ ( 4 , [ a, b, c, d, ..] ) => {
1680+ * a = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
1681+ * b = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
1682+ * c = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
1683+ * d = ( code & 0x3F ) as u8 | TAG_CONT ;
1684+ }
1685+ _ => panic ! (
1686+ "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}" ,
1687+ len,
1688+ code,
1689+ dst. len( ) ,
1690+ ) ,
1691+ } ;
1692+ // SAFETY: We just wrote UTF-8 content in, so converting to str is fine.
1693+ unsafe { from_utf8_unchecked_mut ( & mut dst[ ..len] ) }
1694+ }
0 commit comments