@@ -118,6 +118,8 @@ impl<'a> Drop for Parser<'a> {
118118struct TokenCursor {
119119 frame : TokenCursorFrame ,
120120 stack : Vec < TokenCursorFrame > ,
121+ cur_token : Option < TreeAndJoint > ,
122+ collecting : Option < Collecting > ,
121123}
122124
123125#[ derive( Clone ) ]
@@ -127,30 +129,24 @@ struct TokenCursorFrame {
127129 open_delim : bool ,
128130 tree_cursor : tokenstream:: Cursor ,
129131 close_delim : bool ,
130- last_token : LastToken ,
131132}
132133
133- /// This is used in `TokenCursorFrame` above to track tokens that are consumed
134- /// by the parser, and then that's transitively used to record the tokens that
135- /// each parse AST item is created with.
136- ///
137- /// Right now this has two states, either collecting tokens or not collecting
138- /// tokens. If we're collecting tokens we just save everything off into a local
139- /// `Vec`. This should eventually though likely save tokens from the original
140- /// token stream and just use slicing of token streams to avoid creation of a
141- /// whole new vector.
142- ///
143- /// The second state is where we're passively not recording tokens, but the last
144- /// token is still tracked for when we want to start recording tokens. This
145- /// "last token" means that when we start recording tokens we'll want to ensure
146- /// that this, the first token, is included in the output.
147- ///
148- /// You can find some more example usage of this in the `collect_tokens` method
149- /// on the parser.
150- #[ derive( Clone ) ]
151- enum LastToken {
152- Collecting ( Vec < TreeAndJoint > ) ,
153- Was ( Option < TreeAndJoint > ) ,
134+ /// Used to track additional state needed by `collect_tokens`
135+ #[ derive( Clone , Debug ) ]
136+ struct Collecting {
137+ /// Holds the current tokens captured during the most
138+ /// recent call to `collect_tokens`
139+ buf : Vec < TreeAndJoint > ,
140+ /// The depth of the `TokenCursor` stack at the time
141+ /// collection was started. When we encounter a `TokenTree::Delimited`,
142+ /// we want to record the `TokenTree::Delimited` itself,
143+ /// but *not* any of the inner tokens while we are inside
144+ /// the new frame (this would cause us to record duplicate tokens).
145+ ///
146+ /// This `depth` fields tracks stack depth we are recording tokens.
147+ /// Only tokens encountered at this depth will be recorded. See
148+ /// `TokenCursor::next` for more details.
149+ depth : usize ,
154150}
155151
156152impl TokenCursorFrame {
@@ -161,7 +157,6 @@ impl TokenCursorFrame {
161157 open_delim : delim == token:: NoDelim ,
162158 tree_cursor : tts. clone ( ) . into_trees ( ) ,
163159 close_delim : delim == token:: NoDelim ,
164- last_token : LastToken :: Was ( None ) ,
165160 }
166161 }
167162}
@@ -171,25 +166,38 @@ impl TokenCursor {
171166 loop {
172167 let tree = if !self . frame . open_delim {
173168 self . frame . open_delim = true ;
174- TokenTree :: open_tt ( self . frame . span , self . frame . delim )
175- } else if let Some ( tree) = self . frame . tree_cursor . next ( ) {
169+ TokenTree :: open_tt ( self . frame . span , self . frame . delim ) . into ( )
170+ } else if let Some ( tree) = self . frame . tree_cursor . next_with_joint ( ) {
176171 tree
177172 } else if !self . frame . close_delim {
178173 self . frame . close_delim = true ;
179- TokenTree :: close_tt ( self . frame . span , self . frame . delim )
174+ TokenTree :: close_tt ( self . frame . span , self . frame . delim ) . into ( )
180175 } else if let Some ( frame) = self . stack . pop ( ) {
181176 self . frame = frame;
182177 continue ;
183178 } else {
184179 return Token :: new ( token:: Eof , DUMMY_SP ) ;
185180 } ;
186181
187- match self . frame . last_token {
188- LastToken :: Collecting ( ref mut v) => v. push ( tree. clone ( ) . into ( ) ) ,
189- LastToken :: Was ( ref mut t) => * t = Some ( tree. clone ( ) . into ( ) ) ,
182+ // Don't set an open delimiter as our current token - we want
183+ // to leave it as the full `TokenTree::Delimited` from the previous
184+ // iteration of this loop
185+ if !matches ! ( tree. 0 , TokenTree :: Token ( Token { kind: TokenKind :: OpenDelim ( _) , .. } ) ) {
186+ self . cur_token = Some ( tree. clone ( ) ) ;
187+ }
188+
189+ if let Some ( collecting) = & mut self . collecting {
190+ if collecting. depth == self . stack . len ( ) {
191+ debug ! (
192+ "TokenCursor::next(): collected {:?} at depth {:?}" ,
193+ tree,
194+ self . stack. len( )
195+ ) ;
196+ collecting. buf . push ( tree. clone ( ) . into ( ) )
197+ }
190198 }
191199
192- match tree {
200+ match tree. 0 {
193201 TokenTree :: Token ( token) => return token,
194202 TokenTree :: Delimited ( sp, delim, tts) => {
195203 let frame = TokenCursorFrame :: new ( sp, delim, & tts) ;
@@ -350,6 +358,8 @@ impl<'a> Parser<'a> {
350358 token_cursor : TokenCursor {
351359 frame : TokenCursorFrame :: new ( DelimSpan :: dummy ( ) , token:: NoDelim , & tokens) ,
352360 stack : Vec :: new ( ) ,
361+ cur_token : None ,
362+ collecting : None ,
353363 } ,
354364 desugar_doc_comments,
355365 unmatched_angle_bracket_count : 0 ,
@@ -1105,65 +1115,95 @@ impl<'a> Parser<'a> {
11051115 }
11061116 }
11071117
1118+ /// Records all tokens consumed by the provided callback,
1119+ /// including the current token. These tokens are collected
1120+ /// into a `TokenStream`, and returned along with the result
1121+ /// of the callback.
1122+ ///
1123+ /// Note: If your callback consumes an opening delimiter
1124+ /// (including the case where you call `collect_tokens`
1125+ /// when the current token is an opening delimeter),
1126+ /// you must also consume the corresponding closing delimiter.
1127+ ///
1128+ /// That is, you can consume
1129+ /// `something ([{ }])` or `([{}])`, but not `([{}]`
1130+ ///
1131+ /// This restriction shouldn't be an issue in practice,
1132+ /// since this function is used to record the tokens for
1133+ /// a parsed AST item, which always has matching delimiters.
11081134 fn collect_tokens < R > (
11091135 & mut self ,
11101136 f : impl FnOnce ( & mut Self ) -> PResult < ' a , R > ,
11111137 ) -> PResult < ' a , ( R , TokenStream ) > {
11121138 // Record all tokens we parse when parsing this item.
1113- let mut tokens = Vec :: new ( ) ;
1114- let prev_collecting = match self . token_cursor . frame . last_token {
1115- LastToken :: Collecting ( ref mut list) => Some ( mem:: take ( list) ) ,
1116- LastToken :: Was ( ref mut last) => {
1117- tokens. extend ( last. take ( ) ) ;
1118- None
1119- }
1120- } ;
1121- self . token_cursor . frame . last_token = LastToken :: Collecting ( tokens) ;
1122- let prev = self . token_cursor . stack . len ( ) ;
1139+ let tokens: Vec < TreeAndJoint > = self . token_cursor . cur_token . clone ( ) . into_iter ( ) . collect ( ) ;
1140+ debug ! ( "collect_tokens: starting with {:?}" , tokens) ;
1141+
1142+ // We need special handling for the case where `collect_tokens` is called
1143+ // on an opening delimeter (e.g. '('). At this point, we have already pushed
1144+ // a new frame - however, we want to record the original `TokenTree::Delimited`,
1145+ // for consistency with the case where we start recording one token earlier.
1146+ // See `TokenCursor::next` to see how `cur_token` is set up.
1147+ let prev_depth =
1148+ if matches ! ( self . token_cursor. cur_token, Some ( ( TokenTree :: Delimited ( ..) , _) ) ) {
1149+ if self . token_cursor . stack . is_empty ( ) {
1150+ // There is nothing below us in the stack that
1151+ // the function could consume, so the only thing it can legally
1152+ // capture is the entire contents of the current frame.
1153+ return Ok ( ( f ( self ) ?, TokenStream :: new ( tokens) ) ) ;
1154+ }
1155+ // We have already recorded the full `TokenTree::Delimited` when we created
1156+ // our `tokens` vector at the start of this function. We are now inside
1157+ // a new frame corresponding to the `TokenTree::Delimited` we already recoreded.
1158+ // We don't want to record any of the tokens inside this frame, since they
1159+ // will be duplicates of the tokens nested inside the `TokenTree::Delimited`.
1160+ // Therefore, we set our recording depth to the *previous* frame. This allows
1161+ // us to record a sequence like: `(foo).bar()`: the `(foo)` will be recored
1162+ // as our initial `cur_token`, while the `.bar()` will be recored after we
1163+ // pop the `(foo)` frame.
1164+ self . token_cursor . stack . len ( ) - 1
1165+ } else {
1166+ self . token_cursor . stack . len ( )
1167+ } ;
1168+ let prev_collecting =
1169+ self . token_cursor . collecting . replace ( Collecting { buf : tokens, depth : prev_depth } ) ;
1170+
11231171 let ret = f ( self ) ;
1124- let last_token = if self . token_cursor . stack . len ( ) == prev {
1125- & mut self . token_cursor . frame . last_token
1126- } else if self . token_cursor . stack . get ( prev) . is_none ( ) {
1127- // This can happen due to a bad interaction of two unrelated recovery mechanisms with
1128- // mismatched delimiters *and* recovery lookahead on the likely typo `pub ident(`
1129- // (#62881).
1130- return Ok ( ( ret?, TokenStream :: default ( ) ) ) ;
1172+
1173+ let mut collected_tokens = if let Some ( collecting) = self . token_cursor . collecting . take ( ) {
1174+ collecting. buf
11311175 } else {
1132- & mut self . token_cursor . stack [ prev] . last_token
1176+ let msg = format ! ( "our vector went away?" ) ;
1177+ debug ! ( "collect_tokens: {}" , msg) ;
1178+ self . sess . span_diagnostic . delay_span_bug ( self . token . span , & msg) ;
1179+ // This can happen due to a bad interaction of two unrelated recovery mechanisms
1180+ // with mismatched delimiters *and* recovery lookahead on the likely typo
1181+ // `pub ident(` (#62895, different but similar to the case above).
1182+ return Ok ( ( ret?, TokenStream :: default ( ) ) ) ;
11331183 } ;
11341184
1135- // Pull out the tokens that we've collected from the call to `f` above.
1136- let mut collected_tokens = match * last_token {
1137- LastToken :: Collecting ( ref mut v) => mem:: take ( v) ,
1138- LastToken :: Was ( ref was) => {
1139- let msg = format ! ( "our vector went away? - found Was({:?})" , was) ;
1140- debug ! ( "collect_tokens: {}" , msg) ;
1141- self . sess . span_diagnostic . delay_span_bug ( self . token . span , & msg) ;
1142- // This can happen due to a bad interaction of two unrelated recovery mechanisms
1143- // with mismatched delimiters *and* recovery lookahead on the likely typo
1144- // `pub ident(` (#62895, different but similar to the case above).
1145- return Ok ( ( ret?, TokenStream :: default ( ) ) ) ;
1146- }
1147- } ;
1185+ debug ! ( "collect_tokens: got raw tokens {:?}" , collected_tokens) ;
11481186
11491187 // If we're not at EOF our current token wasn't actually consumed by
11501188 // `f`, but it'll still be in our list that we pulled out. In that case
11511189 // put it back.
11521190 let extra_token = if self . token != token:: Eof { collected_tokens. pop ( ) } else { None } ;
11531191
1154- // If we were previously collecting tokens, then this was a recursive
1155- // call. In that case we need to record all the tokens we collected in
1156- // our parent list as well. To do that we push a clone of our stream
1157- // onto the previous list.
1158- match prev_collecting {
1159- Some ( mut list) => {
1160- list. extend ( collected_tokens. iter ( ) . cloned ( ) ) ;
1161- list. extend ( extra_token) ;
1162- * last_token = LastToken :: Collecting ( list) ;
1163- }
1164- None => {
1165- * last_token = LastToken :: Was ( extra_token) ;
1192+ if let Some ( mut collecting) = prev_collecting {
1193+ // If we were previously collecting at the same depth,
1194+ // then the previous call to `collect_tokens` needs to see
1195+ // the tokens we just recorded.
1196+ //
1197+ // If we were previously recording at an lower `depth`,
1198+ // then the previous `collect_tokens` call already recorded
1199+ // this entire frame in the form of a `TokenTree::Delimited`,
1200+ // so there is nothing else for us to do.
1201+ if collecting. depth == prev_depth {
1202+ collecting. buf . extend ( collected_tokens. iter ( ) . cloned ( ) ) ;
1203+ collecting. buf . extend ( extra_token) ;
1204+ debug ! ( "collect_tokens: updating previous buf to {:?}" , collecting) ;
11661205 }
1206+ self . token_cursor . collecting = Some ( collecting)
11671207 }
11681208
11691209 Ok ( ( ret?, TokenStream :: new ( collected_tokens) ) )
0 commit comments