@@ -25,7 +25,8 @@ use rustc_serialize::{Decodable, Encodable};
2525use rustc_span:: { sym, Span , SpanDecoder , SpanEncoder , Symbol , DUMMY_SP } ;
2626
2727use std:: borrow:: Cow ;
28- use std:: { cmp, fmt, iter} ;
28+ use std:: ops:: Range ;
29+ use std:: { cmp, fmt, iter, mem} ;
2930
3031/// Part of a `TokenStream`.
3132#[ derive( Debug , Clone , PartialEq , Encodable , Decodable , HashStable_Generic ) ]
@@ -106,25 +107,30 @@ where
106107 }
107108}
108109
109- pub trait ToAttrTokenStream : sync:: DynSend + sync:: DynSync {
110- fn to_attr_token_stream ( & self ) -> AttrTokenStream ;
111- }
112-
113- impl ToAttrTokenStream for AttrTokenStream {
114- fn to_attr_token_stream ( & self ) -> AttrTokenStream {
115- self . clone ( )
116- }
117- }
118-
119- /// A lazy version of [`TokenStream`], which defers creation
120- /// of an actual `TokenStream` until it is needed.
121- /// `Box` is here only to reduce the structure size.
110+ /// A lazy version of [`AttrTokenStream`], which defers creation of an actual
111+ /// `AttrTokenStream` until it is needed.
122112#[ derive( Clone ) ]
123- pub struct LazyAttrTokenStream ( Lrc < Box < dyn ToAttrTokenStream > > ) ;
113+ pub struct LazyAttrTokenStream ( Lrc < LazyAttrTokenStreamInner > ) ;
124114
125115impl LazyAttrTokenStream {
126- pub fn new ( inner : impl ToAttrTokenStream + ' static ) -> LazyAttrTokenStream {
127- LazyAttrTokenStream ( Lrc :: new ( Box :: new ( inner) ) )
116+ pub fn new_direct ( stream : AttrTokenStream ) -> LazyAttrTokenStream {
117+ LazyAttrTokenStream ( Lrc :: new ( LazyAttrTokenStreamInner :: Direct ( stream) ) )
118+ }
119+
120+ pub fn new_pending (
121+ start_token : ( Token , Spacing ) ,
122+ cursor_snapshot : TokenCursor ,
123+ num_calls : u32 ,
124+ break_last_token : bool ,
125+ replace_ranges : Box < [ ReplaceRange ] > ,
126+ ) -> LazyAttrTokenStream {
127+ LazyAttrTokenStream ( Lrc :: new ( LazyAttrTokenStreamInner :: Pending {
128+ start_token,
129+ cursor_snapshot,
130+ num_calls,
131+ break_last_token,
132+ replace_ranges,
133+ } ) )
128134 }
129135
130136 pub fn to_attr_token_stream ( & self ) -> AttrTokenStream {
@@ -156,12 +162,213 @@ impl<CTX> HashStable<CTX> for LazyAttrTokenStream {
156162 }
157163}
158164
159- /// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
160- /// information about the tokens for attribute targets. This is used
161- /// during expansion to perform early cfg-expansion, and to process attributes
162- /// during proc-macro invocations.
163- #[ derive( Clone , Debug , Default , Encodable , Decodable ) ]
164- pub struct AttrTokenStream ( pub Lrc < Vec < AttrTokenTree > > ) ;
165+ /// Indicates a range of tokens that should be replaced by the tokens in the
166+ /// provided `AttrsTarget`. This is used in two places during token collection:
167+ ///
168+ /// 1. During the parsing of an AST node that may have a `#[derive]` attribute,
169+ /// we parse a nested AST node that has `#[cfg]` or `#[cfg_attr]` In this
170+ /// case, we use a `ReplaceRange` to replace the entire inner AST node with
171+ /// `FlatToken::AttrsTarget`, allowing us to perform eager cfg-expansion on
172+ /// an `AttrTokenStream`.
173+ ///
174+ /// 2. When we parse an inner attribute while collecting tokens. We remove
175+ /// inner attributes from the token stream entirely, and instead track them
176+ /// through the `attrs` field on the AST node. This allows us to easily
177+ /// manipulate them (for example, removing the first macro inner attribute
178+ /// to invoke a proc-macro). When create a `TokenStream`, the inner
179+ /// attributes get inserted into the proper place in the token stream.
180+ pub type ReplaceRange = ( Range < u32 > , Option < AttrsTarget > ) ;
181+
182+ enum LazyAttrTokenStreamInner {
183+ // The token stream has already been produced.
184+ Direct ( AttrTokenStream ) ,
185+
186+ // Produces a `TokenStream` on-demand. Using `cursor_snapshot` and `num_calls`,
187+ // we can reconstruct the `TokenStream` seen by the callback. This allows us to
188+ // avoid producing a `TokenStream` if it is never needed - for example, a
189+ // captured `macro_rules!` argument that is never passed to a proc macro. In
190+ // practice token stream creation happens rarely compared to calls to
191+ // `collect_tokens` (see some statistics in #78736), so we are doing as little
192+ // up-front work as possible.
193+ //
194+ // This also makes `Parser` very cheap to clone, since there is no intermediate
195+ // collection buffer to clone.
196+ Pending {
197+ start_token : ( Token , Spacing ) ,
198+ cursor_snapshot : TokenCursor ,
199+ num_calls : u32 ,
200+ break_last_token : bool ,
201+ replace_ranges : Box < [ ReplaceRange ] > ,
202+ } ,
203+ }
204+
205+ impl LazyAttrTokenStreamInner {
206+ fn to_attr_token_stream ( & self ) -> AttrTokenStream {
207+ match self {
208+ LazyAttrTokenStreamInner :: Direct ( stream) => stream. clone ( ) ,
209+ LazyAttrTokenStreamInner :: Pending {
210+ start_token,
211+ cursor_snapshot,
212+ num_calls,
213+ break_last_token,
214+ replace_ranges,
215+ } => {
216+ // The token produced by the final call to `{,inlined_}next`
217+ // was not actually consumed by the callback. The combination
218+ // of chaining the initial token and using `take` produces the
219+ // desired result - we produce an empty `TokenStream` if no
220+ // calls were made, and omit the final token otherwise.
221+ let mut cursor_snapshot = cursor_snapshot. clone ( ) ;
222+ let tokens = iter:: once ( FlatToken :: Token ( start_token. clone ( ) ) )
223+ . chain ( iter:: repeat_with ( || FlatToken :: Token ( cursor_snapshot. next ( ) ) ) )
224+ . take ( * num_calls as usize ) ;
225+
226+ if replace_ranges. is_empty ( ) {
227+ make_attr_token_stream ( tokens, * break_last_token)
228+ } else {
229+ let mut tokens: Vec < _ > = tokens. collect ( ) ;
230+ let mut replace_ranges = replace_ranges. to_vec ( ) ;
231+ replace_ranges. sort_by_key ( |( range, _) | range. start ) ;
232+
233+ #[ cfg( debug_assertions) ]
234+ {
235+ for [ ( range, tokens) , ( next_range, next_tokens) ] in
236+ replace_ranges. array_windows ( )
237+ {
238+ assert ! (
239+ range. end <= next_range. start || range. end >= next_range. end,
240+ "Replace ranges should either be disjoint or nested: \
241+ ({:?}, {:?}) ({:?}, {:?})",
242+ range,
243+ tokens,
244+ next_range,
245+ next_tokens,
246+ ) ;
247+ }
248+ }
249+
250+ // Process the replace ranges, starting from the highest
251+ // start position and working our way back. If have tokens
252+ // like:
253+ //
254+ // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
255+ //
256+ // Then we will generate replace ranges for both the
257+ // `#[cfg(FALSE)] field: bool` and the entire
258+ // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
259+ //
260+ // By starting processing from the replace range with the
261+ // greatest start position, we ensure that any replace
262+ // range which encloses another replace range will capture
263+ // the *replaced* tokens for the inner range, not the
264+ // original tokens.
265+ for ( range, target) in replace_ranges. into_iter ( ) . rev ( ) {
266+ assert ! ( !range. is_empty( ) , "Cannot replace an empty range: {range:?}" ) ;
267+
268+ // Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s,
269+ // plus enough `FlatToken::Empty`s to fill up the rest of the range. This
270+ // keeps the total length of `tokens` constant throughout the replacement
271+ // process, allowing us to use all of the `ReplaceRanges` entries without
272+ // adjusting indices.
273+ let target_len = target. is_some ( ) as usize ;
274+ tokens. splice (
275+ ( range. start as usize ) ..( range. end as usize ) ,
276+ target. into_iter ( ) . map ( |target| FlatToken :: AttrsTarget ( target) ) . chain (
277+ iter:: repeat ( FlatToken :: Empty ) . take ( range. len ( ) - target_len) ,
278+ ) ,
279+ ) ;
280+ }
281+ make_attr_token_stream ( tokens. into_iter ( ) , * break_last_token)
282+ }
283+ }
284+ }
285+ }
286+ }
287+
288+ /// A helper struct used when building an `AttrTokenStream` from a
289+ /// `LazyAttrTokenStream`. Both delimiter and non-delimited tokens are stored
290+ /// as `FlatToken::Token`. A vector of `FlatToken`s is then 'parsed' to build
291+ /// up an `AttrTokenStream` with nested `AttrTokenTree::Delimited` tokens.
292+ #[ derive( Debug , Clone ) ]
293+ enum FlatToken {
294+ /// A token. This holds both delimiter (e.g. '{' and '}') and non-delimiter
295+ /// tokens.
296+ Token ( ( Token , Spacing ) ) ,
297+ /// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted
298+ /// directly into the constructed `AttrTokenStream` as an
299+ /// `AttrTokenTree::AttrsTarget`.
300+ AttrsTarget ( AttrsTarget ) ,
301+ /// A special 'empty' token that is ignored during the conversion to an
302+ /// `AttrTokenStream`. This is used to simplify the handling of replace
303+ /// ranges.
304+ Empty ,
305+ }
306+
307+ /// Converts a flattened iterator of tokens (including open and close delimiter
308+ /// tokens) into an `AttrTokenStream`, creating an `AttrTokenTree::Delimited`
309+ /// for each matching pair of open and close delims.
310+ fn make_attr_token_stream (
311+ iter : impl Iterator < Item = FlatToken > ,
312+ break_last_token : bool ,
313+ ) -> AttrTokenStream {
314+ #[ derive( Debug ) ]
315+ struct FrameData {
316+ // This is `None` for the first frame, `Some` for all others.
317+ open_delim_sp : Option < ( Delimiter , Span , Spacing ) > ,
318+ inner : Vec < AttrTokenTree > ,
319+ }
320+ // The stack always has at least one element. Storing it separately makes for shorter code.
321+ let mut stack_top = FrameData { open_delim_sp : None , inner : vec ! [ ] } ;
322+ let mut stack_rest = vec ! [ ] ;
323+ for flat_token in iter {
324+ match flat_token {
325+ FlatToken :: Token ( ( Token { kind : TokenKind :: OpenDelim ( delim) , span } , spacing) ) => {
326+ stack_rest. push ( mem:: replace (
327+ & mut stack_top,
328+ FrameData { open_delim_sp : Some ( ( delim, span, spacing) ) , inner : vec ! [ ] } ,
329+ ) ) ;
330+ }
331+ FlatToken :: Token ( ( Token { kind : TokenKind :: CloseDelim ( delim) , span } , spacing) ) => {
332+ let frame_data = mem:: replace ( & mut stack_top, stack_rest. pop ( ) . unwrap ( ) ) ;
333+ let ( open_delim, open_sp, open_spacing) = frame_data. open_delim_sp . unwrap ( ) ;
334+ assert_eq ! (
335+ open_delim, delim,
336+ "Mismatched open/close delims: open={open_delim:?} close={span:?}"
337+ ) ;
338+ let dspan = DelimSpan :: from_pair ( open_sp, span) ;
339+ let dspacing = DelimSpacing :: new ( open_spacing, spacing) ;
340+ let stream = AttrTokenStream :: new ( frame_data. inner ) ;
341+ let delimited = AttrTokenTree :: Delimited ( dspan, dspacing, delim, stream) ;
342+ stack_top. inner . push ( delimited) ;
343+ }
344+ FlatToken :: Token ( ( token, spacing) ) => {
345+ stack_top. inner . push ( AttrTokenTree :: Token ( token, spacing) )
346+ }
347+ FlatToken :: AttrsTarget ( target) => {
348+ stack_top. inner . push ( AttrTokenTree :: AttrsTarget ( target) )
349+ }
350+ FlatToken :: Empty => { }
351+ }
352+ }
353+
354+ if break_last_token {
355+ let last_token = stack_top. inner . pop ( ) . unwrap ( ) ;
356+ if let AttrTokenTree :: Token ( last_token, spacing) = last_token {
357+ let unglued_first = last_token. kind . break_two_token_op ( ) . unwrap ( ) . 0 ;
358+
359+ // An 'unglued' token is always two ASCII characters.
360+ let mut first_span = last_token. span . shrink_to_lo ( ) ;
361+ first_span = first_span. with_hi ( first_span. lo ( ) + rustc_span:: BytePos ( 1 ) ) ;
362+
363+ stack_top
364+ . inner
365+ . push ( AttrTokenTree :: Token ( Token :: new ( unglued_first, first_span) , spacing) ) ;
366+ } else {
367+ panic ! ( "Unexpected last token {last_token:?}" )
368+ }
369+ }
370+ AttrTokenStream :: new ( stack_top. inner )
371+ }
165372
166373/// Like `TokenTree`, but for `AttrTokenStream`.
167374#[ derive( Clone , Debug , Encodable , Decodable ) ]
@@ -174,6 +381,13 @@ pub enum AttrTokenTree {
174381 AttrsTarget ( AttrsTarget ) ,
175382}
176383
384+ /// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
385+ /// information about the tokens for attribute targets. This is used
386+ /// during expansion to perform early cfg-expansion, and to process attributes
387+ /// during proc-macro invocations.
388+ #[ derive( Clone , Debug , Default , Encodable , Decodable ) ]
389+ pub struct AttrTokenStream ( pub Lrc < Vec < AttrTokenTree > > ) ;
390+
177391impl AttrTokenStream {
178392 pub fn new ( tokens : Vec < AttrTokenTree > ) -> AttrTokenStream {
179393 AttrTokenStream ( Lrc :: new ( tokens) )
@@ -720,6 +934,75 @@ impl TokenTreeCursor {
720934 }
721935}
722936
937+ /// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
938+ /// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
939+ /// use this type to emit them as a linear sequence. But a linear sequence is
940+ /// what the parser expects, for the most part.
941+ #[ derive( Clone , Debug ) ]
942+ pub struct TokenCursor {
943+ // Cursor for the current (innermost) token stream. The delimiters for this
944+ // token stream are found in `self.stack.last()`; when that is `None` then
945+ // we are in the outermost token stream which never has delimiters.
946+ pub tree_cursor : TokenTreeCursor ,
947+
948+ // Token streams surrounding the current one. The delimiters for stack[n]'s
949+ // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
950+ // because it's the outermost token stream which never has delimiters.
951+ pub stack : Vec < ( TokenTreeCursor , DelimSpan , DelimSpacing , Delimiter ) > ,
952+ }
953+
954+ impl TokenCursor {
955+ pub fn next ( & mut self ) -> ( Token , Spacing ) {
956+ self . inlined_next ( )
957+ }
958+
959+ /// This always-inlined version should only be used on hot code paths.
960+ #[ inline( always) ]
961+ pub fn inlined_next ( & mut self ) -> ( Token , Spacing ) {
962+ loop {
963+ // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
964+ // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
965+ // below can be removed.
966+ if let Some ( tree) = self . tree_cursor . next_ref ( ) {
967+ match tree {
968+ & TokenTree :: Token ( ref token, spacing) => {
969+ debug_assert ! ( !matches!(
970+ token. kind,
971+ token:: OpenDelim ( _) | token:: CloseDelim ( _)
972+ ) ) ;
973+ return ( token. clone ( ) , spacing) ;
974+ }
975+ & TokenTree :: Delimited ( sp, spacing, delim, ref tts) => {
976+ let trees = tts. clone ( ) . into_trees ( ) ;
977+ self . stack . push ( (
978+ mem:: replace ( & mut self . tree_cursor , trees) ,
979+ sp,
980+ spacing,
981+ delim,
982+ ) ) ;
983+ if delim != Delimiter :: Invisible {
984+ return ( Token :: new ( token:: OpenDelim ( delim) , sp. open ) , spacing. open ) ;
985+ }
986+ // No open delimiter to return; continue on to the next iteration.
987+ }
988+ } ;
989+ } else if let Some ( ( tree_cursor, span, spacing, delim) ) = self . stack . pop ( ) {
990+ // We have exhausted this token stream. Move back to its parent token stream.
991+ self . tree_cursor = tree_cursor;
992+ if delim != Delimiter :: Invisible {
993+ return ( Token :: new ( token:: CloseDelim ( delim) , span. close ) , spacing. close ) ;
994+ }
995+ // No close delimiter to return; continue on to the next iteration.
996+ } else {
997+ // We have exhausted the outermost token stream. The use of
998+ // `Spacing::Alone` is arbitrary and immaterial, because the
999+ // `Eof` token's spacing is never used.
1000+ return ( Token :: new ( token:: Eof , DUMMY_SP ) , Spacing :: Alone ) ;
1001+ }
1002+ }
1003+ }
1004+ }
1005+
7231006#[ derive( Debug , Copy , Clone , PartialEq , Encodable , Decodable , HashStable_Generic ) ]
7241007pub struct DelimSpan {
7251008 pub open : Span ,
@@ -765,6 +1048,7 @@ mod size_asserts {
7651048 static_assert_size ! ( AttrTokenStream , 8 ) ;
7661049 static_assert_size ! ( AttrTokenTree , 32 ) ;
7671050 static_assert_size ! ( LazyAttrTokenStream , 8 ) ;
1051+ static_assert_size ! ( LazyAttrTokenStreamInner , 96 ) ;
7681052 static_assert_size ! ( Option <LazyAttrTokenStream >, 8 ) ; // must be small, used in many AST nodes
7691053 static_assert_size ! ( TokenStream , 8 ) ;
7701054 static_assert_size ! ( TokenTree , 32 ) ;
0 commit comments