@@ -38,18 +38,17 @@ use std::convert::TryFrom;
3838#[ derive( Debug ) ]
3939pub struct Token {
4040 pub kind : TokenKind ,
41- pub len : usize ,
41+ pub len : u32 ,
4242}
4343
4444impl Token {
45- fn new ( kind : TokenKind , len : usize ) -> Token {
45+ fn new ( kind : TokenKind , len : u32 ) -> Token {
4646 Token { kind, len }
4747 }
4848}
4949
5050/// Enum representing common lexeme types.
51- // perf note: Changing all `usize` to `u32` doesn't change performance. See #77629
52- #[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
51+ #[ derive( Clone , Copy , Debug , PartialEq , Eq ) ]
5352pub enum TokenKind {
5453 // Multi-char tokens:
5554 /// "// comment"
@@ -76,7 +75,7 @@ pub enum TokenKind {
7675 /// tokens.
7776 UnknownPrefix ,
7877 /// "12_u8", "1.0e-40", "b"123"". See `LiteralKind` for more details.
79- Literal { kind : LiteralKind , suffix_start : usize } ,
78+ Literal { kind : LiteralKind , suffix_start : u32 } ,
8079 /// "'a"
8180 Lifetime { starts_with_number : bool } ,
8281
@@ -160,26 +159,24 @@ pub enum LiteralKind {
160159 Str { terminated : bool } ,
161160 /// "b"abc"", "b"abc"
162161 ByteStr { terminated : bool } ,
163- /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
164- RawStr { n_hashes : u8 , err : Option < RawStrError > } ,
165- /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
166- RawByteStr { n_hashes : u8 , err : Option < RawStrError > } ,
162+ /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
163+ /// an invalid literal.
164+ RawStr { n_hashes : Option < u8 > } ,
165+ /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
166+ /// indicates an invalid literal.
167+ RawByteStr { n_hashes : Option < u8 > } ,
167168}
168169
169- /// Error produced validating a raw string. Represents cases like:
170- /// - `r##~"abcde"##`: `InvalidStarter`
171- /// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
172- /// - Too many `#`s (>255): `TooManyDelimiters`
173- // perf note: It doesn't matter that this makes `Token` 36 bytes bigger. See #77629
174170#[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
175171pub enum RawStrError {
176- /// Non `#` characters exist between `r` and `"` eg. `r#~".. `
172+ /// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"## `
177173 InvalidStarter { bad_char : char } ,
178- /// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
179- /// may have intended to terminate it.
180- NoTerminator { expected : usize , found : usize , possible_terminator_offset : Option < usize > } ,
174+ /// The string was not terminated, e.g. `r###"abcde"##`.
175+ /// `possible_terminator_offset` is the number of characters after `r` or
176+ /// `br` where they may have intended to terminate it.
177+ NoTerminator { expected : u32 , found : u32 , possible_terminator_offset : Option < u32 > } ,
181178 /// More than 255 `#`s exist.
182- TooManyDelimiters { found : usize } ,
179+ TooManyDelimiters { found : u32 } ,
183180}
184181
185182/// Base of numeric literal encoding according to its prefix.
@@ -227,6 +224,19 @@ pub fn first_token(input: &str) -> Token {
227224 Cursor :: new ( input) . advance_token ( )
228225}
229226
227+ /// Validates a raw string literal. Used for getting more information about a
228+ /// problem with a `RawStr`/`RawByteStr` with a `None` field.
229+ #[ inline]
230+ pub fn validate_raw_str ( input : & str , prefix_len : u32 ) -> Result < ( ) , RawStrError > {
231+ debug_assert ! ( !input. is_empty( ) ) ;
232+ let mut cursor = Cursor :: new ( input) ;
233+ // Move past the leading `r` or `br`.
234+ for _ in 0 ..prefix_len {
235+ cursor. bump ( ) . unwrap ( ) ;
236+ }
237+ cursor. raw_double_quoted_string ( prefix_len) . map ( |_| ( ) )
238+ }
239+
230240/// Creates an iterator that produces tokens from the input string.
231241pub fn tokenize ( input : & str ) -> impl Iterator < Item = Token > + ' _ {
232242 let mut cursor = Cursor :: new ( input) ;
@@ -316,12 +326,12 @@ impl Cursor<'_> {
316326 'r' => match ( self . first ( ) , self . second ( ) ) {
317327 ( '#' , c1) if is_id_start ( c1) => self . raw_ident ( ) ,
318328 ( '#' , _) | ( '"' , _) => {
319- let ( n_hashes , err ) = self . raw_double_quoted_string ( 1 ) ;
329+ let res = self . raw_double_quoted_string ( 1 ) ;
320330 let suffix_start = self . len_consumed ( ) ;
321- if err . is_none ( ) {
331+ if res . is_ok ( ) {
322332 self . eat_literal_suffix ( ) ;
323333 }
324- let kind = RawStr { n_hashes, err } ;
334+ let kind = RawStr { n_hashes : res . ok ( ) } ;
325335 Literal { kind, suffix_start }
326336 }
327337 _ => self . ident_or_unknown_prefix ( ) ,
@@ -351,12 +361,12 @@ impl Cursor<'_> {
351361 }
352362 ( 'r' , '"' ) | ( 'r' , '#' ) => {
353363 self . bump ( ) ;
354- let ( n_hashes , err ) = self . raw_double_quoted_string ( 2 ) ;
364+ let res = self . raw_double_quoted_string ( 2 ) ;
355365 let suffix_start = self . len_consumed ( ) ;
356- if err . is_none ( ) {
366+ if res . is_ok ( ) {
357367 self . eat_literal_suffix ( ) ;
358368 }
359- let kind = RawByteStr { n_hashes, err } ;
369+ let kind = RawByteStr { n_hashes : res . ok ( ) } ;
360370 Literal { kind, suffix_start }
361371 }
362372 _ => self . ident_or_unknown_prefix ( ) ,
@@ -699,19 +709,18 @@ impl Cursor<'_> {
699709 }
700710
701711 /// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
702- fn raw_double_quoted_string ( & mut self , prefix_len : usize ) -> ( u8 , Option < RawStrError > ) {
712+ fn raw_double_quoted_string ( & mut self , prefix_len : u32 ) -> Result < u8 , RawStrError > {
703713 // Wrap the actual function to handle the error with too many hashes.
704714 // This way, it eats the whole raw string.
705- let ( n_hashes, err ) = self . raw_string_unvalidated ( prefix_len) ;
715+ let n_hashes = self . raw_string_unvalidated ( prefix_len) ? ;
706716 // Only up to 255 `#`s are allowed in raw strings
707717 match u8:: try_from ( n_hashes) {
708- Ok ( num) => ( num, err) ,
709- // We lie about the number of hashes here :P
710- Err ( _) => ( 0 , Some ( RawStrError :: TooManyDelimiters { found : n_hashes } ) ) ,
718+ Ok ( num) => Ok ( num) ,
719+ Err ( _) => Err ( RawStrError :: TooManyDelimiters { found : n_hashes } ) ,
711720 }
712721 }
713722
714- fn raw_string_unvalidated ( & mut self , prefix_len : usize ) -> ( usize , Option < RawStrError > ) {
723+ fn raw_string_unvalidated ( & mut self , prefix_len : u32 ) -> Result < u32 , RawStrError > {
715724 debug_assert ! ( self . prev( ) == 'r' ) ;
716725 let start_pos = self . len_consumed ( ) ;
717726 let mut possible_terminator_offset = None ;
@@ -730,7 +739,7 @@ impl Cursor<'_> {
730739 Some ( '"' ) => ( ) ,
731740 c => {
732741 let c = c. unwrap_or ( EOF_CHAR ) ;
733- return ( n_start_hashes , Some ( RawStrError :: InvalidStarter { bad_char : c } ) ) ;
742+ return Err ( RawStrError :: InvalidStarter { bad_char : c } ) ;
734743 }
735744 }
736745
@@ -740,14 +749,11 @@ impl Cursor<'_> {
740749 self . eat_while ( |c| c != '"' ) ;
741750
742751 if self . is_eof ( ) {
743- return (
744- n_start_hashes,
745- Some ( RawStrError :: NoTerminator {
746- expected : n_start_hashes,
747- found : max_hashes,
748- possible_terminator_offset,
749- } ) ,
750- ) ;
752+ return Err ( RawStrError :: NoTerminator {
753+ expected : n_start_hashes,
754+ found : max_hashes,
755+ possible_terminator_offset,
756+ } ) ;
751757 }
752758
753759 // Eat closing double quote.
@@ -765,7 +771,7 @@ impl Cursor<'_> {
765771 }
766772
767773 if n_end_hashes == n_start_hashes {
768- return ( n_start_hashes, None ) ;
774+ return Ok ( n_start_hashes) ;
769775 } else if n_end_hashes > max_hashes {
770776 // Keep track of possible terminators to give a hint about
771777 // where there might be a missing terminator
0 commit comments