@@ -38,18 +38,17 @@ use std::convert::TryFrom;
3838#[ derive( Debug ) ]
3939pub struct Token {
4040 pub kind : TokenKind ,
41- pub len : usize ,
41+ pub len : u32 ,
4242}
4343
4444impl Token {
45- fn new ( kind : TokenKind , len : usize ) -> Token {
45+ fn new ( kind : TokenKind , len : u32 ) -> Token {
4646 Token { kind, len }
4747 }
4848}
4949
5050/// Enum representing common lexeme types.
51- // perf note: Changing all `usize` to `u32` doesn't change performance. See #77629
52- #[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
51+ #[ derive( Clone , Copy , Debug , PartialEq , Eq ) ]
5352pub enum TokenKind {
5453 // Multi-char tokens:
5554 /// "// comment"
@@ -76,7 +75,7 @@ pub enum TokenKind {
7675 /// tokens.
7776 UnknownPrefix ,
7877 /// "12_u8", "1.0e-40", "b"123"". See `LiteralKind` for more details.
79- Literal { kind : LiteralKind , suffix_start : usize } ,
78+ Literal { kind : LiteralKind , suffix_start : u32 } ,
8079 /// "'a"
8180 Lifetime { starts_with_number : bool } ,
8281
@@ -160,26 +159,24 @@ pub enum LiteralKind {
160159 Str { terminated : bool } ,
161160 /// "b"abc"", "b"abc"
162161 ByteStr { terminated : bool } ,
163- /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
164- RawStr { n_hashes : u8 , err : Option < RawStrError > } ,
165- /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
166- RawByteStr { n_hashes : u8 , err : Option < RawStrError > } ,
162+ /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
163+ /// an invalid literal.
164+ RawStr { n_hashes : Option < u8 > } ,
165+ /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
166+ /// indicates an invalid literal.
167+ RawByteStr { n_hashes : Option < u8 > } ,
167168}
168169
169- /// Error produced validating a raw string. Represents cases like:
170- /// - `r##~"abcde"##`: `InvalidStarter`
171- /// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
172- /// - Too many `#`s (>255): `TooManyDelimiters`
173- // perf note: It doesn't matter that this makes `Token` 36 bytes bigger. See #77629
174170#[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
175171pub enum RawStrError {
176- /// Non `#` characters exist between `r` and `"` eg. `r#~".. `
172+ /// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"## `
177173 InvalidStarter { bad_char : char } ,
178- /// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
179- /// may have intended to terminate it.
180- NoTerminator { expected : usize , found : usize , possible_terminator_offset : Option < usize > } ,
174+ /// The string was not terminated, e.g. `r###"abcde"##`.
175+ /// `possible_terminator_offset` is the number of characters after `r` or
176+ /// `br` where they may have intended to terminate it.
177+ NoTerminator { expected : u32 , found : u32 , possible_terminator_offset : Option < u32 > } ,
181178 /// More than 255 `#`s exist.
182- TooManyDelimiters { found : usize } ,
179+ TooManyDelimiters { found : u32 } ,
183180}
184181
185182/// Base of numeric literal encoding according to its prefix.
@@ -221,11 +218,25 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
221218}
222219
223220/// Parses the first token from the provided input string.
221+ #[ inline]
224222pub fn first_token ( input : & str ) -> Token {
225223 debug_assert ! ( !input. is_empty( ) ) ;
226224 Cursor :: new ( input) . advance_token ( )
227225}
228226
227+ /// Validates a raw string literal. Used for getting more information about a
228+ /// problem with a `RawStr`/`RawByteStr` with a `None` field.
229+ #[ inline]
230+ pub fn validate_raw_str ( input : & str , prefix_len : u32 ) -> Result < ( ) , RawStrError > {
231+ debug_assert ! ( !input. is_empty( ) ) ;
232+ let mut cursor = Cursor :: new ( input) ;
233+ // Move past the leading `r` or `br`.
234+ for _ in 0 ..prefix_len {
235+ cursor. bump ( ) . unwrap ( ) ;
236+ }
237+ cursor. raw_double_quoted_string ( prefix_len) . map ( |_| ( ) )
238+ }
239+
229240/// Creates an iterator that produces tokens from the input string.
230241pub fn tokenize ( input : & str ) -> impl Iterator < Item = Token > + ' _ {
231242 let mut cursor = Cursor :: new ( input) ;
@@ -315,12 +326,12 @@ impl Cursor<'_> {
315326 'r' => match ( self . first ( ) , self . second ( ) ) {
316327 ( '#' , c1) if is_id_start ( c1) => self . raw_ident ( ) ,
317328 ( '#' , _) | ( '"' , _) => {
318- let ( n_hashes , err ) = self . raw_double_quoted_string ( 1 ) ;
329+ let res = self . raw_double_quoted_string ( 1 ) ;
319330 let suffix_start = self . len_consumed ( ) ;
320- if err . is_none ( ) {
331+ if res . is_ok ( ) {
321332 self . eat_literal_suffix ( ) ;
322333 }
323- let kind = RawStr { n_hashes, err } ;
334+ let kind = RawStr { n_hashes : res . ok ( ) } ;
324335 Literal { kind, suffix_start }
325336 }
326337 _ => self . ident_or_unknown_prefix ( ) ,
@@ -350,12 +361,12 @@ impl Cursor<'_> {
350361 }
351362 ( 'r' , '"' ) | ( 'r' , '#' ) => {
352363 self . bump ( ) ;
353- let ( n_hashes , err ) = self . raw_double_quoted_string ( 2 ) ;
364+ let res = self . raw_double_quoted_string ( 2 ) ;
354365 let suffix_start = self . len_consumed ( ) ;
355- if err . is_none ( ) {
366+ if res . is_ok ( ) {
356367 self . eat_literal_suffix ( ) ;
357368 }
358- let kind = RawByteStr { n_hashes, err } ;
369+ let kind = RawByteStr { n_hashes : res . ok ( ) } ;
359370 Literal { kind, suffix_start }
360371 }
361372 _ => self . ident_or_unknown_prefix ( ) ,
@@ -698,19 +709,18 @@ impl Cursor<'_> {
698709 }
699710
700711 /// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
701- fn raw_double_quoted_string ( & mut self , prefix_len : usize ) -> ( u8 , Option < RawStrError > ) {
712+ fn raw_double_quoted_string ( & mut self , prefix_len : u32 ) -> Result < u8 , RawStrError > {
702713 // Wrap the actual function to handle the error with too many hashes.
703714 // This way, it eats the whole raw string.
704- let ( n_hashes, err ) = self . raw_string_unvalidated ( prefix_len) ;
715+ let n_hashes = self . raw_string_unvalidated ( prefix_len) ? ;
705716 // Only up to 255 `#`s are allowed in raw strings
706717 match u8:: try_from ( n_hashes) {
707- Ok ( num) => ( num, err) ,
708- // We lie about the number of hashes here :P
709- Err ( _) => ( 0 , Some ( RawStrError :: TooManyDelimiters { found : n_hashes } ) ) ,
718+ Ok ( num) => Ok ( num) ,
719+ Err ( _) => Err ( RawStrError :: TooManyDelimiters { found : n_hashes } ) ,
710720 }
711721 }
712722
713- fn raw_string_unvalidated ( & mut self , prefix_len : usize ) -> ( usize , Option < RawStrError > ) {
723+ fn raw_string_unvalidated ( & mut self , prefix_len : u32 ) -> Result < u32 , RawStrError > {
714724 debug_assert ! ( self . prev( ) == 'r' ) ;
715725 let start_pos = self . len_consumed ( ) ;
716726 let mut possible_terminator_offset = None ;
@@ -729,7 +739,7 @@ impl Cursor<'_> {
729739 Some ( '"' ) => ( ) ,
730740 c => {
731741 let c = c. unwrap_or ( EOF_CHAR ) ;
732- return ( n_start_hashes , Some ( RawStrError :: InvalidStarter { bad_char : c } ) ) ;
742+ return Err ( RawStrError :: InvalidStarter { bad_char : c } ) ;
733743 }
734744 }
735745
@@ -739,14 +749,11 @@ impl Cursor<'_> {
739749 self . eat_while ( |c| c != '"' ) ;
740750
741751 if self . is_eof ( ) {
742- return (
743- n_start_hashes,
744- Some ( RawStrError :: NoTerminator {
745- expected : n_start_hashes,
746- found : max_hashes,
747- possible_terminator_offset,
748- } ) ,
749- ) ;
752+ return Err ( RawStrError :: NoTerminator {
753+ expected : n_start_hashes,
754+ found : max_hashes,
755+ possible_terminator_offset,
756+ } ) ;
750757 }
751758
752759 // Eat closing double quote.
@@ -764,7 +771,7 @@ impl Cursor<'_> {
764771 }
765772
766773 if n_end_hashes == n_start_hashes {
767- return ( n_start_hashes, None ) ;
774+ return Ok ( n_start_hashes) ;
768775 } else if n_end_hashes > max_hashes {
769776 // Keep track of possible terminators to give a hint about
770777 // where there might be a missing terminator
0 commit comments