1717mod cursor;
1818pub mod unescape;
1919
20+ #[ cfg( test) ]
21+ mod tests;
22+
2023use self :: LiteralKind :: * ;
2124use self :: TokenKind :: * ;
2225use crate :: cursor:: { Cursor , EOF_CHAR } ;
26+ use std:: convert:: TryInto ;
2327
2428/// Parsed token.
2529/// It doesn't contain information about data that has been parsed,
@@ -132,9 +136,65 @@ pub enum LiteralKind {
132136 /// "b"abc"", "b"abc"
133137 ByteStr { terminated : bool } ,
134138 /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
135- RawStr { n_hashes : usize , started : bool , terminated : bool } ,
139+ RawStr ( UnvalidatedRawStr ) ,
136140 /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
137- RawByteStr { n_hashes : usize , started : bool , terminated : bool } ,
141+ RawByteStr ( UnvalidatedRawStr ) ,
142+ }
143+
144+ #[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
145+ pub struct UnvalidatedRawStr {
146+ valid_start : bool ,
147+ n_start_hashes : usize ,
148+ n_end_hashes : usize ,
149+ possible_terminator_offset : Option < usize > ,
150+ }
151+
152+ #[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
153+ pub enum LexRawStrError {
154+ /// Non # characters between `r` and `"` eg. `r#~"..`
155+ InvalidStarter ,
156+ /// The string was never terminated. `possible_terminator_offset` is the best guess of where they
157+ /// may have intended to terminate it.
158+ NoTerminator { expected : usize , found : usize , possible_terminator_offset : Option < usize > } ,
159+ /// More than 65536 # signs
160+ TooManyDelimiters ,
161+ }
162+
163+ #[ derive( Debug , Eq , PartialEq , Copy , Clone ) ]
164+ pub struct ValidatedRawStr {
165+ n_hashes : u16 ,
166+ }
167+
168+ impl ValidatedRawStr {
169+ pub fn num_hashes ( & self ) -> u16 {
170+ self . n_hashes
171+ }
172+ }
173+
174+ impl UnvalidatedRawStr {
175+ pub fn started ( & self ) -> bool {
176+ self . valid_start
177+ }
178+
179+ pub fn validate ( self ) -> Result < ValidatedRawStr , LexRawStrError > {
180+ if !self . valid_start {
181+ return Err ( LexRawStrError :: InvalidStarter ) ;
182+ }
183+
184+ let n_start_safe: u16 =
185+ self . n_start_hashes . try_into ( ) . map_err ( |_| LexRawStrError :: TooManyDelimiters ) ?;
186+ match ( self . n_start_hashes , self . n_end_hashes ) {
187+ ( n_start, n_end) if n_start > n_end => Err ( LexRawStrError :: NoTerminator {
188+ expected : n_start,
189+ found : self . n_end_hashes ,
190+ possible_terminator_offset : self . possible_terminator_offset ,
191+ } ) ,
192+ ( n_start, n_end) => {
193+ debug_assert_eq ! ( n_start, n_end) ;
194+ Ok ( ValidatedRawStr { n_hashes : n_start_safe } )
195+ }
196+ }
197+ }
138198}
139199
140200/// Base of numeric literal encoding according to its prefix.
@@ -209,7 +269,7 @@ pub fn is_whitespace(c: char) -> bool {
209269 // Dedicated whitespace characters from Unicode
210270 | '\u{2028}' // LINE SEPARATOR
211271 | '\u{2029}' // PARAGRAPH SEPARATOR
212- => true ,
272+ => true ,
213273 _ => false ,
214274 }
215275}
@@ -258,12 +318,12 @@ impl Cursor<'_> {
258318 'r' => match ( self . first ( ) , self . second ( ) ) {
259319 ( '#' , c1) if is_id_start ( c1) => self . raw_ident ( ) ,
260320 ( '#' , _) | ( '"' , _) => {
261- let ( n_hashes , started , terminated ) = self . raw_double_quoted_string ( ) ;
321+ let raw_str_i = self . raw_double_quoted_string ( 1 ) ;
262322 let suffix_start = self . len_consumed ( ) ;
263- if terminated {
323+ if raw_str_i . n_end_hashes == raw_str_i . n_start_hashes {
264324 self . eat_literal_suffix ( ) ;
265325 }
266- let kind = RawStr { n_hashes , started , terminated } ;
326+ let kind = RawStr ( raw_str_i ) ;
267327 Literal { kind, suffix_start }
268328 }
269329 _ => self . ident ( ) ,
@@ -293,12 +353,14 @@ impl Cursor<'_> {
293353 }
294354 ( 'r' , '"' ) | ( 'r' , '#' ) => {
295355 self . bump ( ) ;
296- let ( n_hashes , started , terminated ) = self . raw_double_quoted_string ( ) ;
356+ let raw_str_i = self . raw_double_quoted_string ( 2 ) ;
297357 let suffix_start = self . len_consumed ( ) ;
358+ let terminated = raw_str_i. n_start_hashes == raw_str_i. n_end_hashes ;
298359 if terminated {
299360 self . eat_literal_suffix ( ) ;
300361 }
301- let kind = RawByteStr { n_hashes, started, terminated } ;
362+
363+ let kind = RawByteStr ( raw_str_i) ;
302364 Literal { kind, suffix_start }
303365 }
304366 _ => self . ident ( ) ,
@@ -594,37 +656,49 @@ impl Cursor<'_> {
594656 false
595657 }
596658
597- /// Eats the double-quoted string and returns a tuple of
598- /// (amount of the '#' symbols, raw string started, raw string terminated)
599- fn raw_double_quoted_string ( & mut self ) -> ( usize , bool , bool ) {
659+ /// Eats the double-quoted string an UnvalidatedRawStr
660+ fn raw_double_quoted_string ( & mut self , prefix_len : usize ) -> UnvalidatedRawStr {
600661 debug_assert ! ( self . prev( ) == 'r' ) ;
601- let mut started: bool = false ;
602- let mut finished: bool = false ;
662+ let mut valid_start: bool = false ;
663+ let start_pos = self . len_consumed ( ) ;
664+ let ( mut possible_terminator_offset, mut max_hashes) = ( None , 0 ) ;
603665
604666 // Count opening '#' symbols.
605- let n_hashes = self . eat_while ( |c| c == '#' ) ;
667+ let n_start_hashes = self . eat_while ( |c| c == '#' ) ;
606668
607669 // Check that string is started.
608670 match self . bump ( ) {
609- Some ( '"' ) => started = true ,
610- _ => return ( n_hashes, started, finished) ,
671+ Some ( '"' ) => valid_start = true ,
672+ _ => {
673+ return UnvalidatedRawStr {
674+ valid_start,
675+ n_start_hashes,
676+ n_end_hashes : 0 ,
677+ possible_terminator_offset,
678+ } ;
679+ }
611680 }
612681
613682 // Skip the string contents and on each '#' character met, check if this is
614683 // a raw string termination.
615- while !finished {
684+ loop {
616685 self . eat_while ( |c| c != '"' ) ;
617686
618687 if self . is_eof ( ) {
619- return ( n_hashes, started, finished) ;
688+ return UnvalidatedRawStr {
689+ valid_start,
690+ n_start_hashes,
691+ n_end_hashes : max_hashes,
692+ possible_terminator_offset,
693+ } ;
620694 }
621695
622696 // Eat closing double quote.
623697 self . bump ( ) ;
624698
625699 // Check that amount of closing '#' symbols
626700 // is equal to the amount of opening ones.
627- let mut hashes_left = n_hashes ;
701+ let mut hashes_left = n_start_hashes ;
628702 let is_closing_hash = |c| {
629703 if c == '#' && hashes_left != 0 {
630704 hashes_left -= 1 ;
@@ -633,10 +707,23 @@ impl Cursor<'_> {
633707 false
634708 }
635709 } ;
636- finished = self . eat_while ( is_closing_hash) == n_hashes;
710+ let n_end_hashes = self . eat_while ( is_closing_hash) ;
711+
712+ if n_end_hashes == n_start_hashes {
713+ return UnvalidatedRawStr {
714+ valid_start,
715+ n_start_hashes,
716+ n_end_hashes,
717+ possible_terminator_offset : None ,
718+ } ;
719+ } else if n_end_hashes > 0 && n_end_hashes > max_hashes {
720+ // Keep track of possible terminators to give a hint about where there might be
721+ // a missing terminator
722+ possible_terminator_offset =
723+ Some ( self . len_consumed ( ) - start_pos - n_end_hashes + prefix_len) ;
724+ max_hashes = n_end_hashes;
725+ }
637726 }
638-
639- ( n_hashes, started, finished)
640727 }
641728
642729 fn eat_decimal_digits ( & mut self ) -> bool {
0 commit comments