@@ -29,7 +29,7 @@ mod tests;
2929use self :: LiteralKind :: * ;
3030use self :: TokenKind :: * ;
3131use crate :: cursor:: { Cursor , EOF_CHAR } ;
32- use std:: convert:: TryInto ;
32+ use std:: convert:: TryFrom ;
3333
3434/// Parsed token.
3535/// It doesn't contain information about data that has been parsed,
@@ -142,84 +142,24 @@ pub enum LiteralKind {
142142 /// "b"abc"", "b"abc"
143143 ByteStr { terminated : bool } ,
144144 /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
145- RawStr ( UnvalidatedRawStr ) ,
145+ RawStr { n_hashes : u16 , err : Option < RawStrError > } ,
146146 /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
147- RawByteStr ( UnvalidatedRawStr ) ,
148- }
149-
150- /// Represents something that looks like a raw string, but may have some
151- /// problems. Use `.validate()` to convert it into something
152- /// usable.
153- #[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
154- pub struct UnvalidatedRawStr {
155- /// The prefix (`r###"`) is valid
156- valid_start : bool ,
157-
158- /// The postfix (`"###`) is valid
159- valid_end : bool ,
160-
161- /// The number of leading `#`
162- n_start_hashes : usize ,
163- /// The number of trailing `#`. `n_end_hashes` <= `n_start_hashes`
164- n_end_hashes : usize ,
165- /// The offset starting at `r` or `br` where the user may have intended to end the string.
166- /// Currently, it is the longest sequence of pattern `"#+"`.
167- possible_terminator_offset : Option < usize > ,
147+ RawByteStr { n_hashes : u16 , err : Option < RawStrError > } ,
168148}
169149
170150/// Error produced validating a raw string. Represents cases like:
171- /// - `r##~"abcde"##`: `LexRawStrError:: InvalidStarter`
172- /// - `r###"abcde"##`: `LexRawStrError:: NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
173- /// - Too many `#`s (>65536 ): `TooManyDelimiters`
151+ /// - `r##~"abcde"##`: `InvalidStarter`
152+ /// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
153+ /// - Too many `#`s (>65535 ): `TooManyDelimiters`
174154#[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
175- pub enum LexRawStrError {
155+ pub enum RawStrError {
176156 /// Non `#` characters exist between `r` and `"` eg. `r#~"..`
177- InvalidStarter ,
157+ InvalidStarter { bad_char : char } ,
178158 /// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
179159 /// may have intended to terminate it.
180160 NoTerminator { expected : usize , found : usize , possible_terminator_offset : Option < usize > } ,
181- /// More than 65536 `#`s exist.
182- TooManyDelimiters ,
183- }
184-
185- /// Raw String that contains a valid prefix (`#+"`) and postfix (`"#+`) where
186- /// there are a matching number of `#` characters in both. Note that this will
187- /// not consume extra trailing `#` characters: `r###"abcde"####` is lexed as a
188- /// `ValidatedRawString { n_hashes: 3 }` followed by a `#` token.
189- #[ derive( Debug , Eq , PartialEq , Copy , Clone ) ]
190- pub struct ValidatedRawStr {
191- n_hashes : u16 ,
192- }
193-
194- impl ValidatedRawStr {
195- pub fn num_hashes ( & self ) -> u16 {
196- self . n_hashes
197- }
198- }
199-
200- impl UnvalidatedRawStr {
201- pub fn validate ( self ) -> Result < ValidatedRawStr , LexRawStrError > {
202- if !self . valid_start {
203- return Err ( LexRawStrError :: InvalidStarter ) ;
204- }
205-
206- // Only up to 65535 `#`s are allowed in raw strings
207- let n_start_safe: u16 =
208- self . n_start_hashes . try_into ( ) . map_err ( |_| LexRawStrError :: TooManyDelimiters ) ?;
209-
210- if self . n_start_hashes > self . n_end_hashes || !self . valid_end {
211- Err ( LexRawStrError :: NoTerminator {
212- expected : self . n_start_hashes ,
213- found : self . n_end_hashes ,
214- possible_terminator_offset : self . possible_terminator_offset ,
215- } )
216- } else {
217- // Since the lexer should never produce a literal with n_end > n_start, if n_start <= n_end,
218- // they must be equal.
219- debug_assert_eq ! ( self . n_start_hashes, self . n_end_hashes) ;
220- Ok ( ValidatedRawStr { n_hashes : n_start_safe } )
221- }
222- }
161+ /// More than 65535 `#`s exist.
162+ TooManyDelimiters { found : usize } ,
223163}
224164
225165/// Base of numeric literal encoding according to its prefix.
@@ -354,12 +294,12 @@ impl Cursor<'_> {
354294 'r' => match ( self . first ( ) , self . second ( ) ) {
355295 ( '#' , c1) if is_id_start ( c1) => self . raw_ident ( ) ,
356296 ( '#' , _) | ( '"' , _) => {
357- let raw_str_i = self . raw_double_quoted_string ( 1 ) ;
297+ let ( n_hashes , err ) = self . raw_double_quoted_string ( 1 ) ;
358298 let suffix_start = self . len_consumed ( ) ;
359- if raw_str_i . n_end_hashes == raw_str_i . n_start_hashes {
299+ if err . is_none ( ) {
360300 self . eat_literal_suffix ( ) ;
361301 }
362- let kind = RawStr ( raw_str_i ) ;
302+ let kind = RawStr { n_hashes , err } ;
363303 Literal { kind, suffix_start }
364304 }
365305 _ => self . ident ( ) ,
@@ -389,14 +329,12 @@ impl Cursor<'_> {
389329 }
390330 ( 'r' , '"' ) | ( 'r' , '#' ) => {
391331 self . bump ( ) ;
392- let raw_str_i = self . raw_double_quoted_string ( 2 ) ;
332+ let ( n_hashes , err ) = self . raw_double_quoted_string ( 2 ) ;
393333 let suffix_start = self . len_consumed ( ) ;
394- let terminated = raw_str_i. n_start_hashes == raw_str_i. n_end_hashes ;
395- if terminated {
334+ if err. is_none ( ) {
396335 self . eat_literal_suffix ( ) ;
397336 }
398-
399- let kind = RawByteStr ( raw_str_i) ;
337+ let kind = RawByteStr { n_hashes, err } ;
400338 Literal { kind, suffix_start }
401339 }
402340 _ => self . ident ( ) ,
@@ -692,27 +630,34 @@ impl Cursor<'_> {
692630 false
693631 }
694632
695- /// Eats the double-quoted string and returns an `UnvalidatedRawStr`.
696- fn raw_double_quoted_string ( & mut self , prefix_len : usize ) -> UnvalidatedRawStr {
633+ /// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
634+ fn raw_double_quoted_string ( & mut self , prefix_len : usize ) -> ( u16 , Option < RawStrError > ) {
635+ // Wrap the actual function to handle the error with too many hashes.
636+ // This way, it eats the whole raw string.
637+ let ( n_hashes, err) = self . raw_string_unvalidated ( prefix_len) ;
638+ // Only up to 65535 `#`s are allowed in raw strings
639+ match u16:: try_from ( n_hashes) {
640+ Ok ( num) => ( num, err) ,
641+ // We lie about the number of hashes here :P
642+ Err ( _) => ( 0 , Some ( RawStrError :: TooManyDelimiters { found : n_hashes } ) ) ,
643+ }
644+ }
645+
646+ fn raw_string_unvalidated ( & mut self , prefix_len : usize ) -> ( usize , Option < RawStrError > ) {
697647 debug_assert ! ( self . prev( ) == 'r' ) ;
698- let mut valid_start: bool = false ;
699648 let start_pos = self . len_consumed ( ) ;
700- let ( mut possible_terminator_offset, mut max_hashes) = ( None , 0 ) ;
649+ let mut possible_terminator_offset = None ;
650+ let mut max_hashes = 0 ;
701651
702652 // Count opening '#' symbols.
703653 let n_start_hashes = self . eat_while ( |c| c == '#' ) ;
704654
705655 // Check that string is started.
706656 match self . bump ( ) {
707- Some ( '"' ) => valid_start = true ,
708- _ => {
709- return UnvalidatedRawStr {
710- valid_start,
711- valid_end : false ,
712- n_start_hashes,
713- n_end_hashes : 0 ,
714- possible_terminator_offset,
715- } ;
657+ Some ( '"' ) => ( ) ,
658+ c => {
659+ let c = c. unwrap_or ( EOF_CHAR ) ;
660+ return ( n_start_hashes, Some ( RawStrError :: InvalidStarter { bad_char : c } ) ) ;
716661 }
717662 }
718663
@@ -722,13 +667,14 @@ impl Cursor<'_> {
722667 self . eat_while ( |c| c != '"' ) ;
723668
724669 if self . is_eof ( ) {
725- return UnvalidatedRawStr {
726- valid_start,
727- valid_end : false ,
670+ return (
728671 n_start_hashes,
729- n_end_hashes : max_hashes,
730- possible_terminator_offset,
731- } ;
672+ Some ( RawStrError :: NoTerminator {
673+ expected : n_start_hashes,
674+ found : max_hashes,
675+ possible_terminator_offset,
676+ } ) ,
677+ ) ;
732678 }
733679
734680 // Eat closing double quote.
@@ -737,7 +683,7 @@ impl Cursor<'_> {
737683 // Check that amount of closing '#' symbols
738684 // is equal to the amount of opening ones.
739685 // Note that this will not consume extra trailing `#` characters:
740- // `r###"abcde"####` is lexed as a `LexedRawString { n_hashes: 3 }`
686+ // `r###"abcde"####` is lexed as a `RawStr { n_hashes: 3 }`
741687 // followed by a `#` token.
742688 let mut hashes_left = n_start_hashes;
743689 let is_closing_hash = |c| {
@@ -751,13 +697,7 @@ impl Cursor<'_> {
751697 let n_end_hashes = self . eat_while ( is_closing_hash) ;
752698
753699 if n_end_hashes == n_start_hashes {
754- return UnvalidatedRawStr {
755- valid_start,
756- valid_end : true ,
757- n_start_hashes,
758- n_end_hashes,
759- possible_terminator_offset : None ,
760- } ;
700+ return ( n_start_hashes, None ) ;
761701 } else if n_end_hashes > max_hashes {
762702 // Keep track of possible terminators to give a hint about
763703 // where there might be a missing terminator
0 commit comments