@@ -75,7 +75,7 @@ crate use ParseResult::*;
7575
7676use crate :: mbe:: { self , SequenceRepetition , TokenTree } ;
7777
78- use rustc_ast:: token:: { self , DocComment , Nonterminal , Token } ;
78+ use rustc_ast:: token:: { self , DocComment , Nonterminal , Token , TokenKind } ;
7979use rustc_parse:: parser:: { NtOrTt , Parser } ;
8080use rustc_session:: parse:: ParseSess ;
8181use rustc_span:: symbol:: MacroRulesNormalizedIdent ;
@@ -87,17 +87,6 @@ use rustc_data_structures::sync::Lrc;
8787use rustc_span:: symbol:: Ident ;
8888use std:: borrow:: Cow ;
8989use std:: collections:: hash_map:: Entry :: { Occupied , Vacant } ;
90- use std:: mem;
91-
92- /// This is used by `parse_tt_inner` to keep track of delimited submatchers that we have
93- /// descended into.
94- #[ derive( Clone ) ]
95- struct MatcherPosFrame < ' tt > {
96- /// The "parent" matcher that we have descended from.
97- tts : & ' tt [ TokenTree ] ,
98- /// The position of the "dot" in `tt` at the time we descended.
99- idx : usize ,
100- }
10190
10291// One element is enough to cover 95-99% of vectors for most benchmarks. Also,
10392// vectors longer than one frequently have many elements, not just two or
@@ -108,6 +97,33 @@ type NamedMatchVec = SmallVec<[NamedMatch; 1]>;
10897#[ cfg( all( target_arch = "x86_64" , target_pointer_width = "64" ) ) ]
10998rustc_data_structures:: static_assert_size!( NamedMatchVec , 48 ) ;
11099
100+ #[ derive( Clone ) ]
101+ enum MatcherKind < ' tt > {
102+ TopLevel ,
103+ Delimited ( Box < DelimitedSubmatcher < ' tt > > ) ,
104+ Sequence ( Box < SequenceSubmatcher < ' tt > > ) ,
105+ }
106+
107+ #[ derive( Clone ) ]
108+ struct DelimitedSubmatcher < ' tt > {
109+ parent : Parent < ' tt > ,
110+ }
111+
112+ #[ derive( Clone ) ]
113+ struct SequenceSubmatcher < ' tt > {
114+ parent : Parent < ' tt > ,
115+ seq : & ' tt SequenceRepetition ,
116+ }
117+
118+ /// Data used to ascend from a submatcher back to its parent matcher. A subset of the fields from
119+ /// `MathcherPos`.
120+ #[ derive( Clone ) ]
121+ struct Parent < ' tt > {
122+ tts : & ' tt [ TokenTree ] ,
123+ idx : usize ,
124+ kind : MatcherKind < ' tt > ,
125+ }
126+
111127/// A single matcher position, which could be within the top-level matcher, a submatcher, a
112128/// subsubmatcher, etc. For example:
113129/// ```text
@@ -116,13 +132,14 @@ rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
116132/// <--------------> first submatcher; three tts, zero metavars
117133/// <--------------------------> top-level matcher; two tts, one metavar
118134/// ```
119- #[ derive( Clone ) ]
120135struct MatcherPos < ' tt > {
121136 /// The tokens that make up the current matcher. When we are within a `Sequence` or `Delimited`
122137 /// submatcher, this is just the contents of that submatcher.
123138 tts : & ' tt [ TokenTree ] ,
124139
125- /// The "dot" position within the current submatcher, i.e. the index into `tts`.
140+ /// The "dot" position within the current submatcher, i.e. the index into `tts`. Can go one or
141+ /// two positions past the final elements in `tts` when dealing with sequences, see
142+ /// `parse_tt_inner` for details.
126143 idx : usize ,
127144
128145 /// This vector ends up with one element per metavar in the *top-level* matcher, even when this
@@ -134,25 +151,18 @@ struct MatcherPos<'tt> {
134151 /// The number of sequences this mp is within.
135152 seq_depth : usize ,
136153
137- /// The position in `matches` of the first metavar in this (sub)matcher. Zero if there are
138- /// no metavars.
139- match_lo : usize ,
140-
141154 /// The position in `matches` of the next metavar to be matched against the source token
142155 /// stream. Should not be used if there are no metavars.
143156 match_cur : usize ,
144157
145- /// This field is only used if we are matching a sequence.
146- sequence : Option < MatcherPosSequence < ' tt > > ,
147-
148- /// When we are within a `Delimited` submatcher (or subsubmatcher), this tracks the parent
149- /// matcher(s). The bottom of the stack is the top-level matcher.
150- stack : SmallVec < [ MatcherPosFrame < ' tt > ; 1 ] > ,
158+ /// What kind of matcher we are in. For submatchers, this contains enough information to
159+ /// reconstitute a `MatcherPos` within the parent once we ascend out of the submatcher.
160+ kind : MatcherKind < ' tt > ,
151161}
152162
153163// This type is used a lot. Make sure it doesn't unintentionally get bigger.
154164#[ cfg( all( target_arch = "x86_64" , target_pointer_width = "64" ) ) ]
155- rustc_data_structures:: static_assert_size!( MatcherPos <' _>, 104 ) ;
165+ rustc_data_structures:: static_assert_size!( MatcherPos <' _>, 64 ) ;
156166
157167impl < ' tt > MatcherPos < ' tt > {
158168 fn top_level ( matcher : & ' tt [ TokenTree ] , empty_matches : Lrc < NamedMatchVec > ) -> Self {
@@ -161,31 +171,50 @@ impl<'tt> MatcherPos<'tt> {
161171 idx : 0 ,
162172 matches : empty_matches,
163173 seq_depth : 0 ,
164- match_lo : 0 ,
165174 match_cur : 0 ,
166- stack : smallvec ! [ ] ,
167- sequence : None ,
175+ kind : MatcherKind :: TopLevel ,
168176 }
169177 }
170178
179+ fn empty_sequence (
180+ parent_mp : & MatcherPos < ' tt > ,
181+ seq : & ' tt SequenceRepetition ,
182+ empty_matches : Lrc < NamedMatchVec > ,
183+ ) -> Self {
184+ let mut mp = MatcherPos {
185+ tts : parent_mp. tts ,
186+ idx : parent_mp. idx + 1 ,
187+ matches : parent_mp. matches . clone ( ) , // a cheap clone
188+ seq_depth : parent_mp. seq_depth ,
189+ match_cur : parent_mp. match_cur + seq. num_captures ,
190+ kind : parent_mp. kind . clone ( ) , // an expensive clone
191+ } ;
192+ for idx in parent_mp. match_cur ..parent_mp. match_cur + seq. num_captures {
193+ mp. push_match ( idx, MatchedSeq ( empty_matches. clone ( ) ) ) ;
194+ }
195+ mp
196+ }
197+
171198 fn sequence (
172- parent : Box < MatcherPos < ' tt > > ,
199+ parent_mp : Box < MatcherPos < ' tt > > ,
173200 seq : & ' tt SequenceRepetition ,
174201 empty_matches : Lrc < NamedMatchVec > ,
175202 ) -> Self {
203+ let seq_kind = box SequenceSubmatcher {
204+ parent : Parent { tts : parent_mp. tts , idx : parent_mp. idx , kind : parent_mp. kind } ,
205+ seq,
206+ } ;
176207 let mut mp = MatcherPos {
177208 tts : & seq. tts ,
178209 idx : 0 ,
179- matches : parent. matches . clone ( ) ,
180- seq_depth : parent. seq_depth ,
181- match_lo : parent. match_cur ,
182- match_cur : parent. match_cur ,
183- sequence : Some ( MatcherPosSequence { parent, seq } ) ,
184- stack : smallvec ! [ ] ,
210+ matches : parent_mp. matches ,
211+ seq_depth : parent_mp. seq_depth ,
212+ match_cur : parent_mp. match_cur ,
213+ kind : MatcherKind :: Sequence ( seq_kind) ,
185214 } ;
186215 // Start with an empty vec for each metavar within the sequence. Note that `mp.seq_depth`
187216 // must have the parent's depth at this point for these `push_match` calls to work.
188- for idx in mp. match_lo ..mp. match_lo + seq. num_captures {
217+ for idx in mp. match_cur ..mp. match_cur + seq. num_captures {
189218 mp. push_match ( idx, MatchedSeq ( empty_matches. clone ( ) ) ) ;
190219 }
191220 mp. seq_depth += 1 ;
@@ -226,16 +255,6 @@ impl<'tt> MatcherPos<'tt> {
226255 }
227256}
228257
229- #[ derive( Clone ) ]
230- struct MatcherPosSequence < ' tt > {
231- /// The parent matcher position. Effectively gives a linked list of matches all the way to the
232- /// top-level matcher.
233- parent : Box < MatcherPos < ' tt > > ,
234-
235- /// The sequence itself.
236- seq : & ' tt SequenceRepetition ,
237- }
238-
239258enum EofMatcherPositions < ' tt > {
240259 None ,
241260 One ( Box < MatcherPos < ' tt > > ) ,
@@ -448,18 +467,6 @@ impl<'tt> TtParser<'tt> {
448467 let mut eof_mps = EofMatcherPositions :: None ;
449468
450469 while let Some ( mut mp) = self . cur_mps . pop ( ) {
451- // Backtrack out of delimited submatcher when necessary. When backtracking out again,
452- // we need to advance the "dot" past the delimiters in the parent matcher(s).
453- while mp. idx >= mp. tts . len ( ) {
454- match mp. stack . pop ( ) {
455- Some ( MatcherPosFrame { tts, idx } ) => {
456- mp. tts = tts;
457- mp. idx = idx + 1 ;
458- }
459- None => break ,
460- }
461- }
462-
463470 // Get the current position of the "dot" (`idx`) in `mp` and the number of token
464471 // trees in the matcher (`len`).
465472 let idx = mp. idx ;
@@ -473,13 +480,11 @@ impl<'tt> TtParser<'tt> {
473480 let op = seq. kleene . op ;
474481 if op == mbe:: KleeneOp :: ZeroOrMore || op == mbe:: KleeneOp :: ZeroOrOne {
475482 // Allow for the possibility of zero matches of this sequence.
476- let mut new_mp = mp. clone ( ) ;
477- new_mp. match_cur += seq. num_captures ;
478- new_mp. idx += 1 ;
479- for idx in mp. match_cur ..mp. match_cur + seq. num_captures {
480- new_mp. push_match ( idx, MatchedSeq ( self . empty_matches . clone ( ) ) ) ;
481- }
482- self . cur_mps . push ( new_mp) ;
483+ self . cur_mps . push ( box MatcherPos :: empty_sequence (
484+ & * mp,
485+ & seq,
486+ self . empty_matches . clone ( ) ,
487+ ) ) ;
483488 }
484489
485490 // Allow for the possibility of one or more matches of this sequence.
@@ -509,16 +514,17 @@ impl<'tt> TtParser<'tt> {
509514 }
510515
511516 TokenTree :: Delimited ( _, delimited) => {
512- // To descend into a delimited submatcher, we push the current matcher onto
513- // a stack and push a new mp containing the submatcher onto `cur_mps`.
514- //
515- // At the beginning of the loop, if we reach the end of the delimited
516- // submatcher, we pop the stack to backtrack out of the descent. Note that
517- // we use `all_tts` to include the open and close delimiter tokens.
518- let tts = mem :: replace ( & mut mp. tts , & delimited . all_tts ) ;
519- let idx = mp . idx ;
520- mp. stack . push ( MatcherPosFrame { tts, idx } ) ;
517+ // To descend into a delimited submatcher, we update `mp` appropriately,
518+ // including enough information to re-ascend afterwards, and push it onto
519+ // `cur_mps`. Later, when we reach the closing delimiter, we will recover
520+ // the parent matcher position to ascend. Note that we use `all_tts` to
521+ // include the open and close delimiter tokens.
522+ let kind = MatcherKind :: Delimited ( box DelimitedSubmatcher {
523+ parent : Parent { tts : mp. tts , idx : mp . idx , kind : mp . kind } ,
524+ } ) ;
525+ mp. tts = & delimited . all_tts ;
521526 mp. idx = 0 ;
527+ mp. kind = kind;
522528 self . cur_mps . push ( mp) ;
523529 }
524530
@@ -536,6 +542,18 @@ impl<'tt> TtParser<'tt> {
536542 mp. idx += 1 ;
537543 self . cur_mps . push ( mp) ;
538544 } else if token_name_eq ( & t, token) {
545+ if let TokenKind :: CloseDelim ( _) = token. kind {
546+ // Ascend out of the delimited submatcher.
547+ debug_assert_eq ! ( idx, len - 1 ) ;
548+ match mp. kind {
549+ MatcherKind :: Delimited ( submatcher) => {
550+ mp. tts = submatcher. parent . tts ;
551+ mp. idx = submatcher. parent . idx ;
552+ mp. kind = submatcher. parent . kind ;
553+ }
554+ _ => unreachable ! ( ) ,
555+ }
556+ }
539557 mp. idx += 1 ;
540558 self . next_mps . push ( mp) ;
541559 }
@@ -544,38 +562,44 @@ impl<'tt> TtParser<'tt> {
544562 // These cannot appear in a matcher.
545563 TokenTree :: MetaVar ( ..) | TokenTree :: MetaVarExpr ( ..) => unreachable ! ( ) ,
546564 }
547- } else if let Some ( sequence ) = & mp. sequence {
565+ } else if let MatcherKind :: Sequence ( box SequenceSubmatcher { parent , seq } ) = & mp. kind {
548566 // We are past the end of a sequence.
549- debug_assert ! ( idx <= len + 1 ) ;
567+ // - If it has no separator, we must be only one past the end.
568+ // - If it has a separator, we may be one past the end, in which case we must
569+ // look for a separator. Or we may be two past the end, in which case we have
570+ // already dealt with the separator.
571+ debug_assert ! ( idx == len || idx == len + 1 && seq. separator. is_some( ) ) ;
550572
551573 if idx == len {
552- // Add all matches from the sequence to `parent`, and move the "dot" past the
553- // sequence in `parent`. This allows for the case where the sequence matching
554- // is finished.
555- let mut new_mp = sequence. parent . clone ( ) ;
556- new_mp. matches = mp. matches . clone ( ) ;
557- new_mp. match_cur = mp. match_lo + sequence. seq . num_captures ;
558- new_mp. idx += 1 ;
574+ // Sequence matching may have finished: move the "dot" past the sequence in
575+ // `parent`. This applies whether a separator is used or not. If sequence
576+ // matching hasn't finished, this `new_mp` will fail quietly when it is
577+ // processed next time around the loop.
578+ let new_mp = box MatcherPos {
579+ tts : parent. tts ,
580+ idx : parent. idx + 1 ,
581+ matches : mp. matches . clone ( ) , // a cheap clone
582+ seq_depth : mp. seq_depth - 1 ,
583+ match_cur : mp. match_cur ,
584+ kind : parent. kind . clone ( ) , // an expensive clone
585+ } ;
559586 self . cur_mps . push ( new_mp) ;
560587 }
561588
562- if idx == len && sequence. seq . separator . is_some ( ) {
563- if sequence
564- . seq
565- . separator
566- . as_ref ( )
567- . map_or ( false , |sep| token_name_eq ( token, sep) )
568- {
589+ if seq. separator . is_some ( ) && idx == len {
590+ // Look for the separator.
591+ if seq. separator . as_ref ( ) . map_or ( false , |sep| token_name_eq ( token, sep) ) {
569592 // The matcher has a separator, and it matches the current token. We can
570593 // advance past the separator token.
571594 mp. idx += 1 ;
572595 self . next_mps . push ( mp) ;
573596 }
574- } else if sequence. seq . kleene . op != mbe:: KleeneOp :: ZeroOrOne {
575- // We don't need a separator. Move the "dot" back to the beginning of the
576- // matcher and try to match again UNLESS we are only allowed to have _one_
577- // repetition.
578- mp. match_cur = mp. match_lo ;
597+ } else if seq. kleene . op != mbe:: KleeneOp :: ZeroOrOne {
598+ // We don't need to look for a separator: either this sequence doesn't have
599+ // one, or it does and we've already handled it. Also, we are allowed to have
600+ // more than one repetition. Move the "dot" back to the beginning of the
601+ // matcher and try to match again.
602+ mp. match_cur -= seq. num_captures ;
579603 mp. idx = 0 ;
580604 self . cur_mps . push ( mp) ;
581605 }
0 commit comments