@@ -4,10 +4,9 @@ use crate::ast::{self, CrateConfig, NodeId};
44use crate :: early_buffered_lints:: { BufferedEarlyLint , BufferedEarlyLintId } ;
55use crate :: source_map:: { SourceMap , FilePathMapping } ;
66use crate :: feature_gate:: UnstableFeatures ;
7- use crate :: parse:: parser:: Parser ;
8- use crate :: parse:: parser:: emit_unclosed_delims;
9- use crate :: parse:: token:: TokenKind ;
10- use crate :: tokenstream:: { TokenStream , TokenTree } ;
7+ use crate :: parse:: parser:: { Parser , emit_unclosed_delims} ;
8+ use crate :: parse:: token:: { Nonterminal , TokenKind } ;
9+ use crate :: tokenstream:: { self , TokenStream , TokenTree } ;
1110use crate :: print:: pprust;
1211use crate :: symbol:: Symbol ;
1312
@@ -24,6 +23,8 @@ use std::borrow::Cow;
2423use std:: path:: { Path , PathBuf } ;
2524use std:: str;
2625
26+ use log:: info;
27+
2728#[ cfg( test) ]
2829mod tests;
2930
@@ -407,3 +408,132 @@ impl SeqSep {
407408 }
408409 }
409410}
411+
412+ // NOTE(Centril): The following probably shouldn't be here but it acknowledges the
413+ // fact that architecturally, we are using parsing (read on below to understand why).
414+
415+ pub fn nt_to_tokenstream ( nt : & Nonterminal , sess : & ParseSess , span : Span ) -> TokenStream {
416+ // A `Nonterminal` is often a parsed AST item. At this point we now
417+ // need to convert the parsed AST to an actual token stream, e.g.
418+ // un-parse it basically.
419+ //
420+ // Unfortunately there's not really a great way to do that in a
421+ // guaranteed lossless fashion right now. The fallback here is to just
422+ // stringify the AST node and reparse it, but this loses all span
423+ // information.
424+ //
425+ // As a result, some AST nodes are annotated with the token stream they
426+ // came from. Here we attempt to extract these lossless token streams
427+ // before we fall back to the stringification.
428+ let tokens = match * nt {
429+ Nonterminal :: NtItem ( ref item) => {
430+ prepend_attrs ( sess, & item. attrs , item. tokens . as_ref ( ) , span)
431+ }
432+ Nonterminal :: NtTraitItem ( ref item) => {
433+ prepend_attrs ( sess, & item. attrs , item. tokens . as_ref ( ) , span)
434+ }
435+ Nonterminal :: NtImplItem ( ref item) => {
436+ prepend_attrs ( sess, & item. attrs , item. tokens . as_ref ( ) , span)
437+ }
438+ Nonterminal :: NtIdent ( ident, is_raw) => {
439+ Some ( tokenstream:: TokenTree :: token ( token:: Ident ( ident. name , is_raw) , ident. span ) . into ( ) )
440+ }
441+ Nonterminal :: NtLifetime ( ident) => {
442+ Some ( tokenstream:: TokenTree :: token ( token:: Lifetime ( ident. name ) , ident. span ) . into ( ) )
443+ }
444+ Nonterminal :: NtTT ( ref tt) => {
445+ Some ( tt. clone ( ) . into ( ) )
446+ }
447+ _ => None ,
448+ } ;
449+
450+ // FIXME(#43081): Avoid this pretty-print + reparse hack
451+ let source = pprust:: nonterminal_to_string ( nt) ;
452+ let filename = FileName :: macro_expansion_source_code ( & source) ;
453+ let tokens_for_real = parse_stream_from_source_str ( filename, source, sess, Some ( span) ) ;
454+
455+ // During early phases of the compiler the AST could get modified
456+ // directly (e.g., attributes added or removed) and the internal cache
457+ // of tokens my not be invalidated or updated. Consequently if the
458+ // "lossless" token stream disagrees with our actual stringification
459+ // (which has historically been much more battle-tested) then we go
460+ // with the lossy stream anyway (losing span information).
461+ //
462+ // Note that the comparison isn't `==` here to avoid comparing spans,
463+ // but it *also* is a "probable" equality which is a pretty weird
464+ // definition. We mostly want to catch actual changes to the AST
465+ // like a `#[cfg]` being processed or some weird `macro_rules!`
466+ // expansion.
467+ //
468+ // What we *don't* want to catch is the fact that a user-defined
469+ // literal like `0xf` is stringified as `15`, causing the cached token
470+ // stream to not be literal `==` token-wise (ignoring spans) to the
471+ // token stream we got from stringification.
472+ //
473+ // Instead the "probably equal" check here is "does each token
474+ // recursively have the same discriminant?" We basically don't look at
475+ // the token values here and assume that such fine grained token stream
476+ // modifications, including adding/removing typically non-semantic
477+ // tokens such as extra braces and commas, don't happen.
478+ if let Some ( tokens) = tokens {
479+ if tokens. probably_equal_for_proc_macro ( & tokens_for_real) {
480+ return tokens
481+ }
482+ info ! ( "cached tokens found, but they're not \" probably equal\" , \
483+ going with stringified version") ;
484+ }
485+ return tokens_for_real
486+ }
487+
488+ fn prepend_attrs (
489+ sess : & ParseSess ,
490+ attrs : & [ ast:: Attribute ] ,
491+ tokens : Option < & tokenstream:: TokenStream > ,
492+ span : syntax_pos:: Span
493+ ) -> Option < tokenstream:: TokenStream > {
494+ let tokens = tokens?;
495+ if attrs. len ( ) == 0 {
496+ return Some ( tokens. clone ( ) )
497+ }
498+ let mut builder = tokenstream:: TokenStreamBuilder :: new ( ) ;
499+ for attr in attrs {
500+ assert_eq ! ( attr. style, ast:: AttrStyle :: Outer ,
501+ "inner attributes should prevent cached tokens from existing" ) ;
502+
503+ let source = pprust:: attribute_to_string ( attr) ;
504+ let macro_filename = FileName :: macro_expansion_source_code ( & source) ;
505+ if attr. is_sugared_doc {
506+ let stream = parse_stream_from_source_str ( macro_filename, source, sess, Some ( span) ) ;
507+ builder. push ( stream) ;
508+ continue
509+ }
510+
511+ // synthesize # [ $path $tokens ] manually here
512+ let mut brackets = tokenstream:: TokenStreamBuilder :: new ( ) ;
513+
514+ // For simple paths, push the identifier directly
515+ if attr. path . segments . len ( ) == 1 && attr. path . segments [ 0 ] . args . is_none ( ) {
516+ let ident = attr. path . segments [ 0 ] . ident ;
517+ let token = token:: Ident ( ident. name , ident. as_str ( ) . starts_with ( "r#" ) ) ;
518+ brackets. push ( tokenstream:: TokenTree :: token ( token, ident. span ) ) ;
519+
520+ // ... and for more complicated paths, fall back to a reparse hack that
521+ // should eventually be removed.
522+ } else {
523+ let stream = parse_stream_from_source_str ( macro_filename, source, sess, Some ( span) ) ;
524+ brackets. push ( stream) ;
525+ }
526+
527+ brackets. push ( attr. tokens . clone ( ) ) ;
528+
529+ // The span we list here for `#` and for `[ ... ]` are both wrong in
530+ // that it encompasses more than each token, but it hopefully is "good
531+ // enough" for now at least.
532+ builder. push ( tokenstream:: TokenTree :: token ( token:: Pound , attr. span ) ) ;
533+ let delim_span = tokenstream:: DelimSpan :: from_single ( attr. span ) ;
534+ builder. push ( tokenstream:: TokenTree :: Delimited (
535+ delim_span, token:: DelimToken :: Bracket , brackets. build ( ) . into ( ) ) ) ;
536+ }
537+ builder. push ( tokens. clone ( ) ) ;
538+ Some ( builder. build ( ) )
539+ }
0 commit comments