@@ -29,9 +29,11 @@ pub mod unescape;
2929#[ cfg( test) ]
3030mod tests;
3131
32+ pub use crate :: cursor:: Cursor ;
33+
3234use self :: LiteralKind :: * ;
3335use self :: TokenKind :: * ;
34- use crate :: cursor:: { Cursor , EOF_CHAR } ;
36+ use crate :: cursor:: EOF_CHAR ;
3537use std:: convert:: TryFrom ;
3638
3739/// Parsed token.
@@ -139,6 +141,9 @@ pub enum TokenKind {
139141
140142 /// Unknown token, not expected by the lexer, e.g. "№"
141143 Unknown ,
144+
145+ /// End of input.
146+ Eof ,
142147}
143148
144149#[ derive( Clone , Copy , Debug , PartialEq , Eq ) ]
@@ -219,13 +224,6 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
219224 None
220225}
221226
222- /// Parses the first token from the provided input string.
223- #[ inline]
224- pub fn first_token ( input : & str ) -> Token {
225- debug_assert ! ( !input. is_empty( ) ) ;
226- Cursor :: new ( input) . advance_token ( )
227- }
228-
229227/// Validates a raw string literal. Used for getting more information about a
230228/// problem with a `RawStr`/`RawByteStr` with a `None` field.
231229#[ inline]
@@ -243,12 +241,8 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>
243241pub fn tokenize ( input : & str ) -> impl Iterator < Item = Token > + ' _ {
244242 let mut cursor = Cursor :: new ( input) ;
245243 std:: iter:: from_fn ( move || {
246- if cursor. is_eof ( ) {
247- None
248- } else {
249- cursor. reset_len_consumed ( ) ;
250- Some ( cursor. advance_token ( ) )
251- }
244+ let token = cursor. advance_token ( ) ;
245+ if token. kind != TokenKind :: Eof { Some ( token) } else { None }
252246 } )
253247}
254248
@@ -311,8 +305,11 @@ pub fn is_ident(string: &str) -> bool {
311305
312306impl Cursor < ' _ > {
313307 /// Parses a token from the input string.
314- fn advance_token ( & mut self ) -> Token {
315- let first_char = self . bump ( ) . unwrap ( ) ;
308+ pub fn advance_token ( & mut self ) -> Token {
309+ let first_char = match self . bump ( ) {
310+ Some ( c) => c,
311+ None => return Token :: new ( TokenKind :: Eof , 0 ) ,
312+ } ;
316313 let token_kind = match first_char {
317314 // Slash, comment or block comment.
318315 '/' => match self . first ( ) {
@@ -329,7 +326,7 @@ impl Cursor<'_> {
329326 ( '#' , c1) if is_id_start ( c1) => self . raw_ident ( ) ,
330327 ( '#' , _) | ( '"' , _) => {
331328 let res = self . raw_double_quoted_string ( 1 ) ;
332- let suffix_start = self . len_consumed ( ) ;
329+ let suffix_start = self . pos_within_token ( ) ;
333330 if res. is_ok ( ) {
334331 self . eat_literal_suffix ( ) ;
335332 }
@@ -344,7 +341,7 @@ impl Cursor<'_> {
344341 ( '\'' , _) => {
345342 self . bump ( ) ;
346343 let terminated = self . single_quoted_string ( ) ;
347- let suffix_start = self . len_consumed ( ) ;
344+ let suffix_start = self . pos_within_token ( ) ;
348345 if terminated {
349346 self . eat_literal_suffix ( ) ;
350347 }
@@ -354,7 +351,7 @@ impl Cursor<'_> {
354351 ( '"' , _) => {
355352 self . bump ( ) ;
356353 let terminated = self . double_quoted_string ( ) ;
357- let suffix_start = self . len_consumed ( ) ;
354+ let suffix_start = self . pos_within_token ( ) ;
358355 if terminated {
359356 self . eat_literal_suffix ( ) ;
360357 }
@@ -364,7 +361,7 @@ impl Cursor<'_> {
364361 ( 'r' , '"' ) | ( 'r' , '#' ) => {
365362 self . bump ( ) ;
366363 let res = self . raw_double_quoted_string ( 2 ) ;
367- let suffix_start = self . len_consumed ( ) ;
364+ let suffix_start = self . pos_within_token ( ) ;
368365 if res. is_ok ( ) {
369366 self . eat_literal_suffix ( ) ;
370367 }
@@ -381,7 +378,7 @@ impl Cursor<'_> {
381378 // Numeric literal.
382379 c @ '0' ..='9' => {
383380 let literal_kind = self . number ( c) ;
384- let suffix_start = self . len_consumed ( ) ;
381+ let suffix_start = self . pos_within_token ( ) ;
385382 self . eat_literal_suffix ( ) ;
386383 TokenKind :: Literal { kind : literal_kind, suffix_start }
387384 }
@@ -420,7 +417,7 @@ impl Cursor<'_> {
420417 // String literal.
421418 '"' => {
422419 let terminated = self . double_quoted_string ( ) ;
423- let suffix_start = self . len_consumed ( ) ;
420+ let suffix_start = self . pos_within_token ( ) ;
424421 if terminated {
425422 self . eat_literal_suffix ( ) ;
426423 }
@@ -433,7 +430,9 @@ impl Cursor<'_> {
433430 }
434431 _ => Unknown ,
435432 } ;
436- Token :: new ( token_kind, self . len_consumed ( ) )
433+ let res = Token :: new ( token_kind, self . pos_within_token ( ) ) ;
434+ self . reset_pos_within_token ( ) ;
435+ res
437436 }
438437
439438 fn line_comment ( & mut self ) -> TokenKind {
@@ -618,7 +617,7 @@ impl Cursor<'_> {
618617
619618 if !can_be_a_lifetime {
620619 let terminated = self . single_quoted_string ( ) ;
621- let suffix_start = self . len_consumed ( ) ;
620+ let suffix_start = self . pos_within_token ( ) ;
622621 if terminated {
623622 self . eat_literal_suffix ( ) ;
624623 }
@@ -643,7 +642,7 @@ impl Cursor<'_> {
643642 if self . first ( ) == '\'' {
644643 self . bump ( ) ;
645644 let kind = Char { terminated : true } ;
646- Literal { kind, suffix_start : self . len_consumed ( ) }
645+ Literal { kind, suffix_start : self . pos_within_token ( ) }
647646 } else {
648647 Lifetime { starts_with_number }
649648 }
@@ -724,7 +723,7 @@ impl Cursor<'_> {
724723
725724 fn raw_string_unvalidated ( & mut self , prefix_len : u32 ) -> Result < u32 , RawStrError > {
726725 debug_assert ! ( self . prev( ) == 'r' ) ;
727- let start_pos = self . len_consumed ( ) ;
726+ let start_pos = self . pos_within_token ( ) ;
728727 let mut possible_terminator_offset = None ;
729728 let mut max_hashes = 0 ;
730729
@@ -778,7 +777,7 @@ impl Cursor<'_> {
778777 // Keep track of possible terminators to give a hint about
779778 // where there might be a missing terminator
780779 possible_terminator_offset =
781- Some ( self . len_consumed ( ) - start_pos - n_end_hashes + prefix_len) ;
780+ Some ( self . pos_within_token ( ) - start_pos - n_end_hashes + prefix_len) ;
782781 max_hashes = n_end_hashes;
783782 }
784783 }
0 commit comments