@@ -75,7 +75,7 @@ crate use ParseResult::*;
75
75
76
76
use crate :: mbe:: { self , SequenceRepetition , TokenTree } ;
77
77
78
- use rustc_ast:: token:: { self , DocComment , Nonterminal , Token } ;
78
+ use rustc_ast:: token:: { self , DocComment , Nonterminal , Token , TokenKind } ;
79
79
use rustc_parse:: parser:: { NtOrTt , Parser } ;
80
80
use rustc_session:: parse:: ParseSess ;
81
81
use rustc_span:: symbol:: MacroRulesNormalizedIdent ;
@@ -87,17 +87,6 @@ use rustc_data_structures::sync::Lrc;
87
87
use rustc_span:: symbol:: Ident ;
88
88
use std:: borrow:: Cow ;
89
89
use std:: collections:: hash_map:: Entry :: { Occupied , Vacant } ;
90
- use std:: mem;
91
-
92
- /// This is used by `parse_tt_inner` to keep track of delimited submatchers that we have
93
- /// descended into.
94
- #[ derive( Clone ) ]
95
- struct MatcherPosFrame < ' tt > {
96
- /// The "parent" matcher that we have descended from.
97
- tts : & ' tt [ TokenTree ] ,
98
- /// The position of the "dot" in `tt` at the time we descended.
99
- idx : usize ,
100
- }
101
90
102
91
// One element is enough to cover 95-99% of vectors for most benchmarks. Also,
103
92
// vectors longer than one frequently have many elements, not just two or
@@ -108,6 +97,33 @@ type NamedMatchVec = SmallVec<[NamedMatch; 1]>;
108
97
#[ cfg( all( target_arch = "x86_64" , target_pointer_width = "64" ) ) ]
109
98
rustc_data_structures:: static_assert_size!( NamedMatchVec , 48 ) ;
110
99
100
+ #[ derive( Clone ) ]
101
+ enum MatcherKind < ' tt > {
102
+ TopLevel ,
103
+ Delimited ( Box < DelimitedSubmatcher < ' tt > > ) ,
104
+ Sequence ( Box < SequenceSubmatcher < ' tt > > ) ,
105
+ }
106
+
107
+ #[ derive( Clone ) ]
108
+ struct DelimitedSubmatcher < ' tt > {
109
+ parent : Parent < ' tt > ,
110
+ }
111
+
112
+ #[ derive( Clone ) ]
113
+ struct SequenceSubmatcher < ' tt > {
114
+ parent : Parent < ' tt > ,
115
+ seq : & ' tt SequenceRepetition ,
116
+ }
117
+
118
+ /// Data used to ascend from a submatcher back to its parent matcher. A subset of the fields from
119
+ /// `MathcherPos`.
120
+ #[ derive( Clone ) ]
121
+ struct Parent < ' tt > {
122
+ tts : & ' tt [ TokenTree ] ,
123
+ idx : usize ,
124
+ kind : MatcherKind < ' tt > ,
125
+ }
126
+
111
127
/// A single matcher position, which could be within the top-level matcher, a submatcher, a
112
128
/// subsubmatcher, etc. For example:
113
129
/// ```text
@@ -116,13 +132,14 @@ rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
116
132
/// <--------------> first submatcher; three tts, zero metavars
117
133
/// <--------------------------> top-level matcher; two tts, one metavar
118
134
/// ```
119
- #[ derive( Clone ) ]
120
135
struct MatcherPos < ' tt > {
121
136
/// The tokens that make up the current matcher. When we are within a `Sequence` or `Delimited`
122
137
/// submatcher, this is just the contents of that submatcher.
123
138
tts : & ' tt [ TokenTree ] ,
124
139
125
- /// The "dot" position within the current submatcher, i.e. the index into `tts`.
140
+ /// The "dot" position within the current submatcher, i.e. the index into `tts`. Can go one or
141
+ /// two positions past the final elements in `tts` when dealing with sequences, see
142
+ /// `parse_tt_inner` for details.
126
143
idx : usize ,
127
144
128
145
/// This vector ends up with one element per metavar in the *top-level* matcher, even when this
@@ -134,25 +151,18 @@ struct MatcherPos<'tt> {
134
151
/// The number of sequences this mp is within.
135
152
seq_depth : usize ,
136
153
137
- /// The position in `matches` of the first metavar in this (sub)matcher. Zero if there are
138
- /// no metavars.
139
- match_lo : usize ,
140
-
141
154
/// The position in `matches` of the next metavar to be matched against the source token
142
155
/// stream. Should not be used if there are no metavars.
143
156
match_cur : usize ,
144
157
145
- /// This field is only used if we are matching a sequence.
146
- sequence : Option < MatcherPosSequence < ' tt > > ,
147
-
148
- /// When we are within a `Delimited` submatcher (or subsubmatcher), this tracks the parent
149
- /// matcher(s). The bottom of the stack is the top-level matcher.
150
- stack : SmallVec < [ MatcherPosFrame < ' tt > ; 1 ] > ,
158
+ /// What kind of matcher we are in. For submatchers, this contains enough information to
159
+ /// reconstitute a `MatcherPos` within the parent once we ascend out of the submatcher.
160
+ kind : MatcherKind < ' tt > ,
151
161
}
152
162
153
163
// This type is used a lot. Make sure it doesn't unintentionally get bigger.
154
164
#[ cfg( all( target_arch = "x86_64" , target_pointer_width = "64" ) ) ]
155
- rustc_data_structures:: static_assert_size!( MatcherPos <' _>, 104 ) ;
165
+ rustc_data_structures:: static_assert_size!( MatcherPos <' _>, 64 ) ;
156
166
157
167
impl < ' tt > MatcherPos < ' tt > {
158
168
fn top_level ( matcher : & ' tt [ TokenTree ] , empty_matches : Lrc < NamedMatchVec > ) -> Self {
@@ -161,31 +171,50 @@ impl<'tt> MatcherPos<'tt> {
161
171
idx : 0 ,
162
172
matches : empty_matches,
163
173
seq_depth : 0 ,
164
- match_lo : 0 ,
165
174
match_cur : 0 ,
166
- stack : smallvec ! [ ] ,
167
- sequence : None ,
175
+ kind : MatcherKind :: TopLevel ,
168
176
}
169
177
}
170
178
179
+ fn empty_sequence (
180
+ parent_mp : & MatcherPos < ' tt > ,
181
+ seq : & ' tt SequenceRepetition ,
182
+ empty_matches : Lrc < NamedMatchVec > ,
183
+ ) -> Self {
184
+ let mut mp = MatcherPos {
185
+ tts : parent_mp. tts ,
186
+ idx : parent_mp. idx + 1 ,
187
+ matches : parent_mp. matches . clone ( ) , // a cheap clone
188
+ seq_depth : parent_mp. seq_depth ,
189
+ match_cur : parent_mp. match_cur + seq. num_captures ,
190
+ kind : parent_mp. kind . clone ( ) , // an expensive clone
191
+ } ;
192
+ for idx in parent_mp. match_cur ..parent_mp. match_cur + seq. num_captures {
193
+ mp. push_match ( idx, MatchedSeq ( empty_matches. clone ( ) ) ) ;
194
+ }
195
+ mp
196
+ }
197
+
171
198
fn sequence (
172
- parent : Box < MatcherPos < ' tt > > ,
199
+ parent_mp : Box < MatcherPos < ' tt > > ,
173
200
seq : & ' tt SequenceRepetition ,
174
201
empty_matches : Lrc < NamedMatchVec > ,
175
202
) -> Self {
203
+ let seq_kind = box SequenceSubmatcher {
204
+ parent : Parent { tts : parent_mp. tts , idx : parent_mp. idx , kind : parent_mp. kind } ,
205
+ seq,
206
+ } ;
176
207
let mut mp = MatcherPos {
177
208
tts : & seq. tts ,
178
209
idx : 0 ,
179
- matches : parent. matches . clone ( ) ,
180
- seq_depth : parent. seq_depth ,
181
- match_lo : parent. match_cur ,
182
- match_cur : parent. match_cur ,
183
- sequence : Some ( MatcherPosSequence { parent, seq } ) ,
184
- stack : smallvec ! [ ] ,
210
+ matches : parent_mp. matches ,
211
+ seq_depth : parent_mp. seq_depth ,
212
+ match_cur : parent_mp. match_cur ,
213
+ kind : MatcherKind :: Sequence ( seq_kind) ,
185
214
} ;
186
215
// Start with an empty vec for each metavar within the sequence. Note that `mp.seq_depth`
187
216
// must have the parent's depth at this point for these `push_match` calls to work.
188
- for idx in mp. match_lo ..mp. match_lo + seq. num_captures {
217
+ for idx in mp. match_cur ..mp. match_cur + seq. num_captures {
189
218
mp. push_match ( idx, MatchedSeq ( empty_matches. clone ( ) ) ) ;
190
219
}
191
220
mp. seq_depth += 1 ;
@@ -226,16 +255,6 @@ impl<'tt> MatcherPos<'tt> {
226
255
}
227
256
}
228
257
229
- #[ derive( Clone ) ]
230
- struct MatcherPosSequence < ' tt > {
231
- /// The parent matcher position. Effectively gives a linked list of matches all the way to the
232
- /// top-level matcher.
233
- parent : Box < MatcherPos < ' tt > > ,
234
-
235
- /// The sequence itself.
236
- seq : & ' tt SequenceRepetition ,
237
- }
238
-
239
258
enum EofMatcherPositions < ' tt > {
240
259
None ,
241
260
One ( Box < MatcherPos < ' tt > > ) ,
@@ -448,18 +467,6 @@ impl<'tt> TtParser<'tt> {
448
467
let mut eof_mps = EofMatcherPositions :: None ;
449
468
450
469
while let Some ( mut mp) = self . cur_mps . pop ( ) {
451
- // Backtrack out of delimited submatcher when necessary. When backtracking out again,
452
- // we need to advance the "dot" past the delimiters in the parent matcher(s).
453
- while mp. idx >= mp. tts . len ( ) {
454
- match mp. stack . pop ( ) {
455
- Some ( MatcherPosFrame { tts, idx } ) => {
456
- mp. tts = tts;
457
- mp. idx = idx + 1 ;
458
- }
459
- None => break ,
460
- }
461
- }
462
-
463
470
// Get the current position of the "dot" (`idx`) in `mp` and the number of token
464
471
// trees in the matcher (`len`).
465
472
let idx = mp. idx ;
@@ -473,13 +480,11 @@ impl<'tt> TtParser<'tt> {
473
480
let op = seq. kleene . op ;
474
481
if op == mbe:: KleeneOp :: ZeroOrMore || op == mbe:: KleeneOp :: ZeroOrOne {
475
482
// Allow for the possibility of zero matches of this sequence.
476
- let mut new_mp = mp. clone ( ) ;
477
- new_mp. match_cur += seq. num_captures ;
478
- new_mp. idx += 1 ;
479
- for idx in mp. match_cur ..mp. match_cur + seq. num_captures {
480
- new_mp. push_match ( idx, MatchedSeq ( self . empty_matches . clone ( ) ) ) ;
481
- }
482
- self . cur_mps . push ( new_mp) ;
483
+ self . cur_mps . push ( box MatcherPos :: empty_sequence (
484
+ & * mp,
485
+ & seq,
486
+ self . empty_matches . clone ( ) ,
487
+ ) ) ;
483
488
}
484
489
485
490
// Allow for the possibility of one or more matches of this sequence.
@@ -509,16 +514,17 @@ impl<'tt> TtParser<'tt> {
509
514
}
510
515
511
516
TokenTree :: Delimited ( _, delimited) => {
512
- // To descend into a delimited submatcher, we push the current matcher onto
513
- // a stack and push a new mp containing the submatcher onto `cur_mps`.
514
- //
515
- // At the beginning of the loop, if we reach the end of the delimited
516
- // submatcher, we pop the stack to backtrack out of the descent. Note that
517
- // we use `all_tts` to include the open and close delimiter tokens.
518
- let tts = mem :: replace ( & mut mp. tts , & delimited . all_tts ) ;
519
- let idx = mp . idx ;
520
- mp. stack . push ( MatcherPosFrame { tts, idx } ) ;
517
+ // To descend into a delimited submatcher, we update `mp` appropriately,
518
+ // including enough information to re-ascend afterwards, and push it onto
519
+ // `cur_mps`. Later, when we reach the closing delimiter, we will recover
520
+ // the parent matcher position to ascend. Note that we use `all_tts` to
521
+ // include the open and close delimiter tokens.
522
+ let kind = MatcherKind :: Delimited ( box DelimitedSubmatcher {
523
+ parent : Parent { tts : mp. tts , idx : mp . idx , kind : mp . kind } ,
524
+ } ) ;
525
+ mp. tts = & delimited . all_tts ;
521
526
mp. idx = 0 ;
527
+ mp. kind = kind;
522
528
self . cur_mps . push ( mp) ;
523
529
}
524
530
@@ -536,6 +542,18 @@ impl<'tt> TtParser<'tt> {
536
542
mp. idx += 1 ;
537
543
self . cur_mps . push ( mp) ;
538
544
} else if token_name_eq ( & t, token) {
545
+ if let TokenKind :: CloseDelim ( _) = token. kind {
546
+ // Ascend out of the delimited submatcher.
547
+ debug_assert_eq ! ( idx, len - 1 ) ;
548
+ match mp. kind {
549
+ MatcherKind :: Delimited ( submatcher) => {
550
+ mp. tts = submatcher. parent . tts ;
551
+ mp. idx = submatcher. parent . idx ;
552
+ mp. kind = submatcher. parent . kind ;
553
+ }
554
+ _ => unreachable ! ( ) ,
555
+ }
556
+ }
539
557
mp. idx += 1 ;
540
558
self . next_mps . push ( mp) ;
541
559
}
@@ -544,38 +562,44 @@ impl<'tt> TtParser<'tt> {
544
562
// These cannot appear in a matcher.
545
563
TokenTree :: MetaVar ( ..) | TokenTree :: MetaVarExpr ( ..) => unreachable ! ( ) ,
546
564
}
547
- } else if let Some ( sequence ) = & mp. sequence {
565
+ } else if let MatcherKind :: Sequence ( box SequenceSubmatcher { parent , seq } ) = & mp. kind {
548
566
// We are past the end of a sequence.
549
- debug_assert ! ( idx <= len + 1 ) ;
567
+ // - If it has no separator, we must be only one past the end.
568
+ // - If it has a separator, we may be one past the end, in which case we must
569
+ // look for a separator. Or we may be two past the end, in which case we have
570
+ // already dealt with the separator.
571
+ debug_assert ! ( idx == len || idx == len + 1 && seq. separator. is_some( ) ) ;
550
572
551
573
if idx == len {
552
- // Add all matches from the sequence to `parent`, and move the "dot" past the
553
- // sequence in `parent`. This allows for the case where the sequence matching
554
- // is finished.
555
- let mut new_mp = sequence. parent . clone ( ) ;
556
- new_mp. matches = mp. matches . clone ( ) ;
557
- new_mp. match_cur = mp. match_lo + sequence. seq . num_captures ;
558
- new_mp. idx += 1 ;
574
+ // Sequence matching may have finished: move the "dot" past the sequence in
575
+ // `parent`. This applies whether a separator is used or not. If sequence
576
+ // matching hasn't finished, this `new_mp` will fail quietly when it is
577
+ // processed next time around the loop.
578
+ let new_mp = box MatcherPos {
579
+ tts : parent. tts ,
580
+ idx : parent. idx + 1 ,
581
+ matches : mp. matches . clone ( ) , // a cheap clone
582
+ seq_depth : mp. seq_depth - 1 ,
583
+ match_cur : mp. match_cur ,
584
+ kind : parent. kind . clone ( ) , // an expensive clone
585
+ } ;
559
586
self . cur_mps . push ( new_mp) ;
560
587
}
561
588
562
- if idx == len && sequence. seq . separator . is_some ( ) {
563
- if sequence
564
- . seq
565
- . separator
566
- . as_ref ( )
567
- . map_or ( false , |sep| token_name_eq ( token, sep) )
568
- {
589
+ if seq. separator . is_some ( ) && idx == len {
590
+ // Look for the separator.
591
+ if seq. separator . as_ref ( ) . map_or ( false , |sep| token_name_eq ( token, sep) ) {
569
592
// The matcher has a separator, and it matches the current token. We can
570
593
// advance past the separator token.
571
594
mp. idx += 1 ;
572
595
self . next_mps . push ( mp) ;
573
596
}
574
- } else if sequence. seq . kleene . op != mbe:: KleeneOp :: ZeroOrOne {
575
- // We don't need a separator. Move the "dot" back to the beginning of the
576
- // matcher and try to match again UNLESS we are only allowed to have _one_
577
- // repetition.
578
- mp. match_cur = mp. match_lo ;
597
+ } else if seq. kleene . op != mbe:: KleeneOp :: ZeroOrOne {
598
+ // We don't need to look for a separator: either this sequence doesn't have
599
+ // one, or it does and we've already handled it. Also, we are allowed to have
600
+ // more than one repetition. Move the "dot" back to the beginning of the
601
+ // matcher and try to match again.
602
+ mp. match_cur -= seq. num_captures ;
579
603
mp. idx = 0 ;
580
604
self . cur_mps . push ( mp) ;
581
605
}
0 commit comments