@@ -86,14 +86,16 @@ where
8686 let res = unescape_char_or_byte ( & mut chars, mode == Mode :: Byte ) ;
8787 callback ( 0 ..( src. len ( ) - chars. as_str ( ) . len ( ) ) , res) ;
8888 }
89- Mode :: Str | Mode :: ByteStr => unescape_str_or_byte_str ( src, mode == Mode :: ByteStr , callback) ,
89+ Mode :: Str | Mode :: ByteStr => unescape_str_common ( src, mode, callback) ,
90+
9091 Mode :: RawStr | Mode :: RawByteStr => {
9192 unescape_raw_str_or_raw_byte_str ( src, mode == Mode :: RawByteStr , callback)
9293 }
9394 Mode :: CStr | Mode :: RawCStr => unreachable ! ( ) ,
9495 }
9596}
9697
98+ /// A unit within CStr. Must not be a nul character.
9799pub enum CStrUnit {
98100 Byte ( u8 ) ,
99101 Char ( char ) ,
@@ -164,24 +166,52 @@ impl Mode {
164166 }
165167 }
166168
167- pub fn is_byte ( self ) -> bool {
169+ /// Non-byte literals should have `\xXX` escapes that are within the ASCII range.
170+ pub fn ascii_escapes_should_be_ascii ( self ) -> bool {
171+ match self {
172+ Mode :: Char | Mode :: Str | Mode :: RawStr => true ,
173+ Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr | Mode :: CStr | Mode :: RawCStr => false ,
174+ }
175+ }
176+
177+ /// Whether characters within the literal must be within the ASCII range
178+ pub fn characters_should_be_ascii ( self ) -> bool {
179+ match self {
180+ Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr => true ,
181+ Mode :: Char | Mode :: Str | Mode :: RawStr | Mode :: CStr | Mode :: RawCStr => false ,
182+ }
183+ }
184+
185+ /// Byte literals do not allow unicode escape.
186+ pub fn is_unicode_escape_disallowed ( self ) -> bool {
168187 match self {
169- Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr | Mode :: CStr | Mode :: RawCStr => true ,
170- Mode :: Char | Mode :: Str | Mode :: RawStr => false ,
188+ Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr => true ,
189+ Mode :: Char | Mode :: Str | Mode :: RawStr | Mode :: CStr | Mode :: RawCStr => false ,
190+ }
191+ }
192+
193+ pub fn prefix_noraw ( self ) -> & ' static str {
194+ match self {
195+ Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr => "b" ,
196+ Mode :: CStr | Mode :: RawCStr => "c" ,
197+ Mode :: Char | Mode :: Str | Mode :: RawStr => "" ,
171198 }
172199 }
173200}
174201
175- fn scan_escape ( chars : & mut Chars < ' _ > , is_byte : bool ) -> Result < char , EscapeError > {
202+ fn scan_escape < T : From < u8 > + From < char > > (
203+ chars : & mut Chars < ' _ > ,
204+ mode : Mode ,
205+ ) -> Result < T , EscapeError > {
176206 // Previous character was '\\', unescape what follows.
177207 let res = match chars. next ( ) . ok_or ( EscapeError :: LoneSlash ) ? {
178- '"' => '"' ,
179- 'n' => '\n' ,
180- 'r' => '\r' ,
181- 't' => '\t' ,
182- '\\' => '\\' ,
183- '\'' => '\'' ,
184- '0' => '\0' ,
208+ '"' => b '"',
209+ 'n' => b '\n',
210+ 'r' => b '\r',
211+ 't' => b '\t',
212+ '\\' => b '\\',
213+ '\'' => b '\'',
214+ '0' => b '\0',
185215
186216 'x' => {
187217 // Parse hexadecimal character code.
@@ -194,22 +224,23 @@ fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError
194224
195225 let value = hi * 16 + lo;
196226
197- // For a non-byte literal verify that it is within ASCII range.
198- if !is_byte && !is_ascii ( value) {
227+ if mode. ascii_escapes_should_be_ascii ( ) && !is_ascii ( value) {
199228 return Err ( EscapeError :: OutOfRangeHexEscape ) ;
200229 }
201- let value = value as u8 ;
202230
203- value as char
231+ value as u8
204232 }
205233
206- 'u' => scan_unicode ( chars, is_byte ) ? ,
234+ 'u' => return scan_unicode ( chars, mode . is_unicode_escape_disallowed ( ) ) . map ( Into :: into ) ,
207235 _ => return Err ( EscapeError :: InvalidEscape ) ,
208236 } ;
209- Ok ( res)
237+ Ok ( res. into ( ) )
210238}
211239
212- fn scan_unicode ( chars : & mut Chars < ' _ > , is_byte : bool ) -> Result < char , EscapeError > {
240+ fn scan_unicode (
241+ chars : & mut Chars < ' _ > ,
242+ is_unicode_escape_disallowed : bool ,
243+ ) -> Result < char , EscapeError > {
213244 // We've parsed '\u', now we have to parse '{..}'.
214245
215246 if chars. next ( ) != Some ( '{' ) {
@@ -237,7 +268,7 @@ fn scan_unicode(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeErro
237268
238269 // Incorrect syntax has higher priority for error reporting
239270 // than unallowed value for a literal.
240- if is_byte {
271+ if is_unicode_escape_disallowed {
241272 return Err ( EscapeError :: UnicodeEscapeInByte ) ;
242273 }
243274
@@ -263,8 +294,8 @@ fn scan_unicode(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeErro
263294}
264295
265296#[ inline]
266- fn ascii_check ( c : char , is_byte : bool ) -> Result < char , EscapeError > {
267- if is_byte && !c. is_ascii ( ) {
297+ fn ascii_check ( c : char , characters_should_be_ascii : bool ) -> Result < char , EscapeError > {
298+ if characters_should_be_ascii && !c. is_ascii ( ) {
268299 // Byte literal can't be a non-ascii character.
269300 Err ( EscapeError :: NonAsciiCharInByte )
270301 } else {
@@ -275,7 +306,7 @@ fn ascii_check(c: char, is_byte: bool) -> Result<char, EscapeError> {
275306fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , is_byte : bool ) -> Result < char , EscapeError > {
276307 let c = chars. next ( ) . ok_or ( EscapeError :: ZeroChars ) ?;
277308 let res = match c {
278- '\\' => scan_escape ( chars, is_byte) ,
309+ '\\' => scan_escape ( chars, if is_byte { Mode :: Byte } else { Mode :: Char } ) ,
279310 '\n' | '\t' | '\'' => Err ( EscapeError :: EscapeOnlyChar ) ,
280311 '\r' => Err ( EscapeError :: BareCarriageReturn ) ,
281312 _ => ascii_check ( c, is_byte) ,
@@ -288,9 +319,9 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, E
288319
289320/// Takes a contents of a string literal (without quotes) and produces a
290321/// sequence of escaped characters or errors.
291- fn unescape_str_or_byte_str < F > ( src : & str , is_byte : bool , callback : & mut F )
322+ fn unescape_str_common < F , T : From < u8 > + From < char > > ( src : & str , mode : Mode , callback : & mut F )
292323where
293- F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
324+ F : FnMut ( Range < usize > , Result < T , EscapeError > ) ,
294325{
295326 let mut chars = src. chars ( ) ;
296327
@@ -312,17 +343,17 @@ where
312343 } ) ;
313344 continue ;
314345 }
315- _ => scan_escape ( & mut chars, is_byte ) ,
346+ _ => scan_escape :: < T > ( & mut chars, mode ) ,
316347 }
317348 }
318- '\n' => Ok ( '\n' ) ,
319- '\t' => Ok ( '\t' ) ,
349+ '\n' => Ok ( b '\n'. into ( ) ) ,
350+ '\t' => Ok ( b '\t'. into ( ) ) ,
320351 '"' => Err ( EscapeError :: EscapeOnlyChar ) ,
321352 '\r' => Err ( EscapeError :: BareCarriageReturn ) ,
322- _ => ascii_check ( c, is_byte ) ,
353+ _ => ascii_check ( c, mode . characters_should_be_ascii ( ) ) . map ( Into :: into ) ,
323354 } ;
324355 let end = src. len ( ) - chars. as_str ( ) . len ( ) ;
325- callback ( start..end, res) ;
356+ callback ( start..end, res. map ( Into :: into ) ) ;
326357 }
327358}
328359
0 commit comments