1313use slice;
1414use str:: from_utf8_unchecked_mut;
1515use super :: * ;
16- use super :: CharExt as C ;
1716use super :: printable:: is_printable;
1817use unicode:: tables:: { conversions, derived_property, general_category, property} ;
1918
20- #[ stable( feature = "core" , since = "1.6.0" ) ]
21- impl CharExt for char {
22- #[ inline]
23- fn is_digit ( self , radix : u32 ) -> bool {
24- self . to_digit ( radix) . is_some ( )
25- }
26-
27- #[ inline]
28- fn to_digit ( self , radix : u32 ) -> Option < u32 > {
29- if radix > 36 {
30- panic ! ( "to_digit: radix is too high (maximum 36)" ) ;
31- }
32- let val = match self {
33- '0' ... '9' => self as u32 - '0' as u32 ,
34- 'a' ... 'z' => self as u32 - 'a' as u32 + 10 ,
35- 'A' ... 'Z' => self as u32 - 'A' as u32 + 10 ,
36- _ => return None ,
37- } ;
38- if val < radix { Some ( val) }
39- else { None }
40- }
41-
42- #[ inline]
43- fn escape_unicode ( self ) -> EscapeUnicode {
44- let c = self as u32 ;
45-
46- // or-ing 1 ensures that for c==0 the code computes that one
47- // digit should be printed and (which is the same) avoids the
48- // (31 - 32) underflow
49- let msb = 31 - ( c | 1 ) . leading_zeros ( ) ;
50-
51- // the index of the most significant hex digit
52- let ms_hex_digit = msb / 4 ;
53- EscapeUnicode {
54- c : self ,
55- state : EscapeUnicodeState :: Backslash ,
56- hex_digit_idx : ms_hex_digit as usize ,
57- }
58- }
59-
60- #[ inline]
61- fn escape_default ( self ) -> EscapeDefault {
62- let init_state = match self {
63- '\t' => EscapeDefaultState :: Backslash ( 't' ) ,
64- '\r' => EscapeDefaultState :: Backslash ( 'r' ) ,
65- '\n' => EscapeDefaultState :: Backslash ( 'n' ) ,
66- '\\' | '\'' | '"' => EscapeDefaultState :: Backslash ( self ) ,
67- '\x20' ... '\x7e' => EscapeDefaultState :: Char ( self ) ,
68- _ => EscapeDefaultState :: Unicode ( self . escape_unicode ( ) )
69- } ;
70- EscapeDefault { state : init_state }
71- }
72-
73- #[ inline]
74- fn escape_debug ( self ) -> EscapeDebug {
75- let init_state = match self {
76- '\t' => EscapeDefaultState :: Backslash ( 't' ) ,
77- '\r' => EscapeDefaultState :: Backslash ( 'r' ) ,
78- '\n' => EscapeDefaultState :: Backslash ( 'n' ) ,
79- '\\' | '\'' | '"' => EscapeDefaultState :: Backslash ( self ) ,
80- c if is_printable ( c) => EscapeDefaultState :: Char ( c) ,
81- c => EscapeDefaultState :: Unicode ( c. escape_unicode ( ) ) ,
82- } ;
83- EscapeDebug ( EscapeDefault { state : init_state } )
84- }
85-
86- #[ inline]
87- fn len_utf8 ( self ) -> usize {
88- let code = self as u32 ;
89- if code < MAX_ONE_B {
90- 1
91- } else if code < MAX_TWO_B {
92- 2
93- } else if code < MAX_THREE_B {
94- 3
95- } else {
96- 4
97- }
98- }
99-
100- #[ inline]
101- fn len_utf16 ( self ) -> usize {
102- let ch = self as u32 ;
103- if ( ch & 0xFFFF ) == ch { 1 } else { 2 }
104- }
105-
106- #[ inline]
107- fn encode_utf8 ( self , dst : & mut [ u8 ] ) -> & mut str {
108- let code = self as u32 ;
109- unsafe {
110- let len =
111- if code < MAX_ONE_B && !dst. is_empty ( ) {
112- * dst. get_unchecked_mut ( 0 ) = code as u8 ;
113- 1
114- } else if code < MAX_TWO_B && dst. len ( ) >= 2 {
115- * dst. get_unchecked_mut ( 0 ) = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
116- * dst. get_unchecked_mut ( 1 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
117- 2
118- } else if code < MAX_THREE_B && dst. len ( ) >= 3 {
119- * dst. get_unchecked_mut ( 0 ) = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
120- * dst. get_unchecked_mut ( 1 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
121- * dst. get_unchecked_mut ( 2 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
122- 3
123- } else if dst. len ( ) >= 4 {
124- * dst. get_unchecked_mut ( 0 ) = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
125- * dst. get_unchecked_mut ( 1 ) = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
126- * dst. get_unchecked_mut ( 2 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
127- * dst. get_unchecked_mut ( 3 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
128- 4
129- } else {
130- panic ! ( "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}" ,
131- from_u32_unchecked( code) . len_utf8( ) ,
132- code,
133- dst. len( ) )
134- } ;
135- from_utf8_unchecked_mut ( dst. get_unchecked_mut ( ..len) )
136- }
137- }
138-
139- #[ inline]
140- fn encode_utf16 ( self , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
141- let mut code = self as u32 ;
142- unsafe {
143- if ( code & 0xFFFF ) == code && !dst. is_empty ( ) {
144- // The BMP falls through (assuming non-surrogate, as it should)
145- * dst. get_unchecked_mut ( 0 ) = code as u16 ;
146- slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 1 )
147- } else if dst. len ( ) >= 2 {
148- // Supplementary planes break into surrogates.
149- code -= 0x1_0000 ;
150- * dst. get_unchecked_mut ( 0 ) = 0xD800 | ( ( code >> 10 ) as u16 ) ;
151- * dst. get_unchecked_mut ( 1 ) = 0xDC00 | ( ( code as u16 ) & 0x3FF ) ;
152- slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 2 )
153- } else {
154- panic ! ( "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}" ,
155- from_u32_unchecked( code) . len_utf16( ) ,
156- code,
157- dst. len( ) )
158- }
159- }
160- }
161- }
162-
16319#[ lang = "char" ]
16420impl char {
16521 /// Checks if a `char` is a digit in the given radix.
@@ -211,7 +67,7 @@ impl char {
21167 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
21268 #[ inline]
21369 pub fn is_digit ( self , radix : u32 ) -> bool {
214- C :: is_digit ( self , radix)
70+ self . to_digit ( radix) . is_some ( )
21571 }
21672
21773 /// Converts a `char` to a digit in the given radix.
@@ -265,7 +121,17 @@ impl char {
265121 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
266122 #[ inline]
267123 pub fn to_digit ( self , radix : u32 ) -> Option < u32 > {
268- C :: to_digit ( self , radix)
124+ if radix > 36 {
125+ panic ! ( "to_digit: radix is too high (maximum 36)" ) ;
126+ }
127+ let val = match self {
128+ '0' ... '9' => self as u32 - '0' as u32 ,
129+ 'a' ... 'z' => self as u32 - 'a' as u32 + 10 ,
130+ 'A' ... 'Z' => self as u32 - 'A' as u32 + 10 ,
131+ _ => return None ,
132+ } ;
133+ if val < radix { Some ( val) }
134+ else { None }
269135 }
270136
271137 /// Returns an iterator that yields the hexadecimal Unicode escape of a
@@ -305,7 +171,20 @@ impl char {
305171 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
306172 #[ inline]
307173 pub fn escape_unicode ( self ) -> EscapeUnicode {
308- C :: escape_unicode ( self )
174+ let c = self as u32 ;
175+
176+ // or-ing 1 ensures that for c==0 the code computes that one
177+ // digit should be printed and (which is the same) avoids the
178+ // (31 - 32) underflow
179+ let msb = 31 - ( c | 1 ) . leading_zeros ( ) ;
180+
181+ // the index of the most significant hex digit
182+ let ms_hex_digit = msb / 4 ;
183+ EscapeUnicode {
184+ c : self ,
185+ state : EscapeUnicodeState :: Backslash ,
186+ hex_digit_idx : ms_hex_digit as usize ,
187+ }
309188 }
310189
311190 /// Returns an iterator that yields the literal escape code of a character
@@ -345,7 +224,15 @@ impl char {
345224 #[ stable( feature = "char_escape_debug" , since = "1.20.0" ) ]
346225 #[ inline]
347226 pub fn escape_debug ( self ) -> EscapeDebug {
348- C :: escape_debug ( self )
227+ let init_state = match self {
228+ '\t' => EscapeDefaultState :: Backslash ( 't' ) ,
229+ '\r' => EscapeDefaultState :: Backslash ( 'r' ) ,
230+ '\n' => EscapeDefaultState :: Backslash ( 'n' ) ,
231+ '\\' | '\'' | '"' => EscapeDefaultState :: Backslash ( self ) ,
232+ c if is_printable ( c) => EscapeDefaultState :: Char ( c) ,
233+ c => EscapeDefaultState :: Unicode ( c. escape_unicode ( ) ) ,
234+ } ;
235+ EscapeDebug ( EscapeDefault { state : init_state } )
349236 }
350237
351238 /// Returns an iterator that yields the literal escape code of a character
@@ -400,7 +287,15 @@ impl char {
400287 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
401288 #[ inline]
402289 pub fn escape_default ( self ) -> EscapeDefault {
403- C :: escape_default ( self )
290+ let init_state = match self {
291+ '\t' => EscapeDefaultState :: Backslash ( 't' ) ,
292+ '\r' => EscapeDefaultState :: Backslash ( 'r' ) ,
293+ '\n' => EscapeDefaultState :: Backslash ( 'n' ) ,
294+ '\\' | '\'' | '"' => EscapeDefaultState :: Backslash ( self ) ,
295+ '\x20' ... '\x7e' => EscapeDefaultState :: Char ( self ) ,
296+ _ => EscapeDefaultState :: Unicode ( self . escape_unicode ( ) )
297+ } ;
298+ EscapeDefault { state : init_state }
404299 }
405300
406301 /// Returns the number of bytes this `char` would need if encoded in UTF-8.
@@ -451,7 +346,16 @@ impl char {
451346 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
452347 #[ inline]
453348 pub fn len_utf8 ( self ) -> usize {
454- C :: len_utf8 ( self )
349+ let code = self as u32 ;
350+ if code < MAX_ONE_B {
351+ 1
352+ } else if code < MAX_TWO_B {
353+ 2
354+ } else if code < MAX_THREE_B {
355+ 3
356+ } else {
357+ 4
358+ }
455359 }
456360
457361 /// Returns the number of 16-bit code units this `char` would need if
@@ -476,7 +380,8 @@ impl char {
476380 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
477381 #[ inline]
478382 pub fn len_utf16 ( self ) -> usize {
479- C :: len_utf16 ( self )
383+ let ch = self as u32 ;
384+ if ( ch & 0xFFFF ) == ch { 1 } else { 2 }
480385 }
481386
482387 /// Encodes this character as UTF-8 into the provided byte buffer,
@@ -518,7 +423,35 @@ impl char {
518423 #[ stable( feature = "unicode_encode_char" , since = "1.15.0" ) ]
519424 #[ inline]
520425 pub fn encode_utf8 ( self , dst : & mut [ u8 ] ) -> & mut str {
521- C :: encode_utf8 ( self , dst)
426+ let code = self as u32 ;
427+ unsafe {
428+ let len =
429+ if code < MAX_ONE_B && !dst. is_empty ( ) {
430+ * dst. get_unchecked_mut ( 0 ) = code as u8 ;
431+ 1
432+ } else if code < MAX_TWO_B && dst. len ( ) >= 2 {
433+ * dst. get_unchecked_mut ( 0 ) = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
434+ * dst. get_unchecked_mut ( 1 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
435+ 2
436+ } else if code < MAX_THREE_B && dst. len ( ) >= 3 {
437+ * dst. get_unchecked_mut ( 0 ) = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
438+ * dst. get_unchecked_mut ( 1 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
439+ * dst. get_unchecked_mut ( 2 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
440+ 3
441+ } else if dst. len ( ) >= 4 {
442+ * dst. get_unchecked_mut ( 0 ) = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
443+ * dst. get_unchecked_mut ( 1 ) = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
444+ * dst. get_unchecked_mut ( 2 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
445+ * dst. get_unchecked_mut ( 3 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
446+ 4
447+ } else {
448+ panic ! ( "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}" ,
449+ from_u32_unchecked( code) . len_utf8( ) ,
450+ code,
451+ dst. len( ) )
452+ } ;
453+ from_utf8_unchecked_mut ( dst. get_unchecked_mut ( ..len) )
454+ }
522455 }
523456
524457 /// Encodes this character as UTF-16 into the provided `u16` buffer,
@@ -558,7 +491,25 @@ impl char {
558491 #[ stable( feature = "unicode_encode_char" , since = "1.15.0" ) ]
559492 #[ inline]
560493 pub fn encode_utf16 ( self , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
561- C :: encode_utf16 ( self , dst)
494+ let mut code = self as u32 ;
495+ unsafe {
496+ if ( code & 0xFFFF ) == code && !dst. is_empty ( ) {
497+ // The BMP falls through (assuming non-surrogate, as it should)
498+ * dst. get_unchecked_mut ( 0 ) = code as u16 ;
499+ slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 1 )
500+ } else if dst. len ( ) >= 2 {
501+ // Supplementary planes break into surrogates.
502+ code -= 0x1_0000 ;
503+ * dst. get_unchecked_mut ( 0 ) = 0xD800 | ( ( code >> 10 ) as u16 ) ;
504+ * dst. get_unchecked_mut ( 1 ) = 0xDC00 | ( ( code as u16 ) & 0x3FF ) ;
505+ slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 2 )
506+ } else {
507+ panic ! ( "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}" ,
508+ from_u32_unchecked( code) . len_utf16( ) ,
509+ code,
510+ dst. len( ) )
511+ }
512+ }
562513 }
563514
564515 /// Returns true if this `char` is an alphabetic code point, and false if not.
0 commit comments