11use rustc_ast:: ast:: AttrStyle ;
22use rustc_ast:: token:: { self , CommentKind , Token , TokenKind } ;
3+ use rustc_ast:: tokenstream:: IsJoint ;
34use rustc_data_structures:: sync:: Lrc ;
45use rustc_errors:: { error_code, Applicability , DiagnosticBuilder , FatalError } ;
56use rustc_lexer:: Base ;
@@ -65,42 +66,46 @@ impl<'a> StringReader<'a> {
6566 self . override_span . unwrap_or_else ( || Span :: with_root_ctxt ( lo, hi) )
6667 }
6768
68- /// Returns the next token, including trivia like whitespace or comments.
69- fn next_token ( & mut self ) -> Token {
69+ /// Returns the next token, and info about preceding whitespace, if any.
70+ fn next_token ( & mut self ) -> ( IsJoint , Token ) {
71+ let mut is_joint = IsJoint :: Joint ;
72+
73+ // Skip `#!` at the start of the file
7074 let start_src_index = self . src_index ( self . pos ) ;
7175 let text: & str = & self . src [ start_src_index..self . end_src_index ] ;
72-
73- if text. is_empty ( ) {
74- let span = self . mk_sp ( self . pos , self . pos ) ;
75- return Token :: new ( token:: Eof , span) ;
76+ let is_beginning_of_file = self . pos == self . start_pos ;
77+ if is_beginning_of_file {
78+ if let Some ( shebang_len) = rustc_lexer:: strip_shebang ( text) {
79+ self . pos = self . pos + BytePos :: from_usize ( shebang_len) ;
80+ is_joint = IsJoint :: NonJoint ;
81+ }
7682 }
7783
78- {
79- let is_beginning_of_file = self . pos == self . start_pos ;
80- if is_beginning_of_file {
81- if let Some ( shebang_len) = rustc_lexer:: strip_shebang ( text) {
82- let start = self . pos ;
83- self . pos = self . pos + BytePos :: from_usize ( shebang_len) ;
84+ // Skip trivial (whitespace & comments) tokens
85+ loop {
86+ let start_src_index = self . src_index ( self . pos ) ;
87+ let text: & str = & self . src [ start_src_index..self . end_src_index ] ;
8488
85- let sym = self . symbol_from ( start + BytePos :: from_usize ( "#!" . len ( ) ) ) ;
86- let kind = token:: Shebang ( sym) ;
87-
88- let span = self . mk_sp ( start, self . pos ) ;
89- return Token :: new ( kind, span) ;
90- }
89+ if text. is_empty ( ) {
90+ let span = self . mk_sp ( self . pos , self . pos ) ;
91+ return ( is_joint, Token :: new ( token:: Eof , span) ) ;
9192 }
92- }
9393
94- let token = rustc_lexer:: first_token ( text) ;
94+ let token = rustc_lexer:: first_token ( text) ;
9595
96- let start = self . pos ;
97- self . pos = self . pos + BytePos :: from_usize ( token. len ) ;
96+ let start = self . pos ;
97+ self . pos = self . pos + BytePos :: from_usize ( token. len ) ;
9898
99- debug ! ( "try_next_token : {:?}({:?})" , token. kind, self . str_from( start) ) ;
99+ debug ! ( "next_token : {:?}({:?})" , token. kind, self . str_from( start) ) ;
100100
101- let kind = self . cook_lexer_token ( token. kind , start) ;
102- let span = self . mk_sp ( start, self . pos ) ;
103- Token :: new ( kind, span)
101+ match self . cook_lexer_token ( token. kind , start) {
102+ Some ( kind) => {
103+ let span = self . mk_sp ( start, self . pos ) ;
104+ return ( is_joint, Token :: new ( kind, span) ) ;
105+ }
106+ None => is_joint = IsJoint :: NonJoint ,
107+ }
108+ }
104109 }
105110
106111 /// Report a fatal lexical error with a given span.
@@ -140,19 +145,16 @@ impl<'a> StringReader<'a> {
140145 /// Turns simple `rustc_lexer::TokenKind` enum into a rich
141146 /// `librustc_ast::TokenKind`. This turns strings into interned
142147 /// symbols and runs additional validation.
143- fn cook_lexer_token ( & self , token : rustc_lexer:: TokenKind , start : BytePos ) -> TokenKind {
144- match token {
148+ fn cook_lexer_token ( & self , token : rustc_lexer:: TokenKind , start : BytePos ) -> Option < TokenKind > {
149+ Some ( match token {
145150 rustc_lexer:: TokenKind :: LineComment { doc_style } => {
146- match doc_style {
147- Some ( doc_style) => {
148- // Opening delimiter of the length 3 is not included into the symbol.
149- let content_start = start + BytePos ( 3 ) ;
150- let content = self . str_from ( content_start) ;
151+ // Skip non-doc comments
152+ let doc_style = doc_style?;
151153
152- self . cook_doc_comment ( content_start , content , CommentKind :: Line , doc_style )
153- }
154- None => token :: Comment ,
155- }
154+ // Opening delimiter of the length 3 is not included into the symbol.
155+ let content_start = start + BytePos ( 3 ) ;
156+ let content = self . str_from ( content_start ) ;
157+ self . cook_doc_comment ( content_start , content , CommentKind :: Line , doc_style )
156158 }
157159 rustc_lexer:: TokenKind :: BlockComment { doc_style, terminated } => {
158160 if !terminated {
@@ -171,20 +173,18 @@ impl<'a> StringReader<'a> {
171173 . emit ( ) ;
172174 FatalError . raise ( ) ;
173175 }
174- match doc_style {
175- Some ( doc_style) => {
176- // Opening delimiter of the length 3 and closing delimiter of the length 2
177- // are not included into the symbol.
178- let content_start = start + BytePos ( 3 ) ;
179- let content_end = self . pos - BytePos ( if terminated { 2 } else { 0 } ) ;
180- let content = self . str_from_to ( content_start, content_end) ;
181-
182- self . cook_doc_comment ( content_start, content, CommentKind :: Block , doc_style)
183- }
184- None => token:: Comment ,
185- }
176+
177+ // Skip non-doc comments
178+ let doc_style = doc_style?;
179+
180+ // Opening delimiter of the length 3 and closing delimiter of the length 2
181+ // are not included into the symbol.
182+ let content_start = start + BytePos ( 3 ) ;
183+ let content_end = self . pos - BytePos ( if terminated { 2 } else { 0 } ) ;
184+ let content = self . str_from_to ( content_start, content_end) ;
185+ self . cook_doc_comment ( content_start, content, CommentKind :: Block , doc_style)
186186 }
187- rustc_lexer:: TokenKind :: Whitespace => token :: Whitespace ,
187+ rustc_lexer:: TokenKind :: Whitespace => return None ,
188188 rustc_lexer:: TokenKind :: Ident | rustc_lexer:: TokenKind :: RawIdent => {
189189 let is_raw_ident = token == rustc_lexer:: TokenKind :: RawIdent ;
190190 let mut ident_start = start;
@@ -282,12 +282,11 @@ impl<'a> StringReader<'a> {
282282 // this should be inside `rustc_lexer`. However, we should first remove compound
283283 // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
284284 // as there will be less overall work to do this way.
285- let token = unicode_chars:: check_for_substitution ( self , start, c, & mut err)
286- . unwrap_or_else ( || token:: Unknown ( self . symbol_from ( start) ) ) ;
285+ let token = unicode_chars:: check_for_substitution ( self , start, c, & mut err) ;
287286 err. emit ( ) ;
288- token
287+ token?
289288 }
290- }
289+ } )
291290 }
292291
293292 fn cook_doc_comment (
@@ -450,12 +449,6 @@ impl<'a> StringReader<'a> {
450449 self . str_from_to ( start, self . pos )
451450 }
452451
453- /// Creates a Symbol from a given offset to the current offset.
454- fn symbol_from ( & self , start : BytePos ) -> Symbol {
455- debug ! ( "taking an ident from {:?} to {:?}" , start, self . pos) ;
456- Symbol :: intern ( self . str_from ( start) )
457- }
458-
459452 /// As symbol_from, with an explicit endpoint.
460453 fn symbol_from_to ( & self , start : BytePos , end : BytePos ) -> Symbol {
461454 debug ! ( "taking an ident from {:?} to {:?}" , start, end) ;
0 commit comments