Skip to content

Commit f68a044

Browse files
committed
Remove MatcherPos::stack.
`parse_tt` needs a way to get from within submatchers make to the enclosing submatchers. Currently it has two distinct mechanisms for this: - `Delimited` submatchers use `MatcherPos::stack` to record stuff about the parent (and further back ancestors). - `Sequence` submatchers use `MatcherPosSequence::parent` to point to the parent matcher position. Having two mechanisms is really confusing, and it took me a long time to understand all this. This commit eliminates `MatcherPos::stack`, and changes `Delimited` submatchers to use the same mechanism as sequence submatchers. That mechanism is also changed a bit: instead of storing the entire parent `MatcherPos`, we now only store the necessary parts from the parent `MatcherPos`. Overall this is a small performance win, with the positives outweighing the negatives, but it's mostly for clarity.
1 parent 048bd67 commit f68a044

File tree

2 files changed

+74
-62
lines changed

2 files changed

+74
-62
lines changed

compiler/rustc_expand/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#![feature(associated_type_bounds)]
22
#![feature(associated_type_defaults)]
3+
#![feature(box_patterns)]
34
#![feature(box_syntax)]
45
#![feature(crate_visibility_modifier)]
56
#![feature(decl_macro)]

compiler/rustc_expand/src/mbe/macro_parser.rs

+73-62
Original file line numberDiff line numberDiff line change
@@ -87,17 +87,6 @@ use rustc_data_structures::sync::Lrc;
8787
use rustc_span::symbol::Ident;
8888
use std::borrow::Cow;
8989
use std::collections::hash_map::Entry::{Occupied, Vacant};
90-
use std::mem;
91-
92-
/// This is used by `parse_tt_inner` to keep track of delimited submatchers that we have
93-
/// descended into.
94-
#[derive(Clone)]
95-
struct MatcherPosFrame<'tt> {
96-
/// The "parent" matcher that we have descended from.
97-
tts: &'tt [TokenTree],
98-
/// The position of the "dot" in `tt` at the time we descended.
99-
idx: usize,
100-
}
10190

10291
// One element is enough to cover 95-99% of vectors for most benchmarks. Also,
10392
// vectors longer than one frequently have many elements, not just two or
@@ -108,6 +97,33 @@ type NamedMatchVec = SmallVec<[NamedMatch; 1]>;
10897
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
10998
rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
11099

100+
#[derive(Clone)]
101+
enum MatcherKind<'tt> {
102+
TopLevel,
103+
Delimited(Box<DelimitedSubmatcher<'tt>>),
104+
Sequence(Box<SequenceSubmatcher<'tt>>),
105+
}
106+
107+
#[derive(Clone)]
108+
struct DelimitedSubmatcher<'tt> {
109+
parent: Parent<'tt>,
110+
}
111+
112+
#[derive(Clone)]
113+
struct SequenceSubmatcher<'tt> {
114+
parent: Parent<'tt>,
115+
seq: &'tt SequenceRepetition,
116+
}
117+
118+
/// Data used to ascend from a submatcher back to its parent matcher. A subset of the fields from
119+
/// `MathcherPos`.
120+
#[derive(Clone)]
121+
struct Parent<'tt> {
122+
tts: &'tt [TokenTree],
123+
idx: usize,
124+
kind: MatcherKind<'tt>,
125+
}
126+
111127
/// A single matcher position, which could be within the top-level matcher, a submatcher, a
112128
/// subsubmatcher, etc. For example:
113129
/// ```text
@@ -140,17 +156,14 @@ struct MatcherPos<'tt> {
140156
/// stream. Should not be used if there are no metavars.
141157
match_cur: usize,
142158

143-
/// This field is only used if we are matching a sequence.
144-
sequence: Option<MatcherPosSequence<'tt>>,
145-
146-
/// When we are within a `Delimited` submatcher (or subsubmatcher), this tracks the parent
147-
/// matcher(s). The bottom of the stack is the top-level matcher.
148-
stack: SmallVec<[MatcherPosFrame<'tt>; 1]>,
159+
/// What kind of matcher we are in. For submatchers, this contains enough information to
160+
/// reconstitute a `MatcherPos` within the parent once we ascend out of the submatcher.
161+
kind: MatcherKind<'tt>,
149162
}
150163

151164
// This type is used a lot. Make sure it doesn't unintentionally get bigger.
152165
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
153-
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 96);
166+
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 64);
154167

155168
impl<'tt> MatcherPos<'tt> {
156169
fn top_level(matcher: &'tt [TokenTree], empty_matches: Lrc<NamedMatchVec>) -> Self {
@@ -160,24 +173,26 @@ impl<'tt> MatcherPos<'tt> {
160173
matches: empty_matches,
161174
seq_depth: 0,
162175
match_cur: 0,
163-
stack: smallvec![],
164-
sequence: None,
176+
kind: MatcherKind::TopLevel,
165177
}
166178
}
167179

168180
fn sequence(
169-
parent: Box<MatcherPos<'tt>>,
181+
parent_mp: Box<MatcherPos<'tt>>,
170182
seq: &'tt SequenceRepetition,
171183
empty_matches: Lrc<NamedMatchVec>,
172184
) -> Self {
185+
let seq_kind = box SequenceSubmatcher {
186+
parent: Parent { tts: parent_mp.tts, idx: parent_mp.idx, kind: parent_mp.kind },
187+
seq,
188+
};
173189
let mut mp = MatcherPos {
174190
tts: &seq.tts,
175191
idx: 0,
176-
matches: parent.matches.clone(),
177-
seq_depth: parent.seq_depth,
178-
match_cur: parent.match_cur,
179-
sequence: Some(MatcherPosSequence { parent, seq }),
180-
stack: smallvec![],
192+
matches: parent_mp.matches,
193+
seq_depth: parent_mp.seq_depth,
194+
match_cur: parent_mp.match_cur,
195+
kind: MatcherKind::Sequence(seq_kind),
181196
};
182197
// Start with an empty vec for each metavar within the sequence. Note that `mp.seq_depth`
183198
// must have the parent's depth at this point for these `push_match` calls to work.
@@ -222,16 +237,6 @@ impl<'tt> MatcherPos<'tt> {
222237
}
223238
}
224239

225-
#[derive(Clone)]
226-
struct MatcherPosSequence<'tt> {
227-
/// The parent matcher position. Effectively gives a linked list of matches all the way to the
228-
/// top-level matcher.
229-
parent: Box<MatcherPos<'tt>>,
230-
231-
/// The sequence itself.
232-
seq: &'tt SequenceRepetition,
233-
}
234-
235240
enum EofMatcherPositions<'tt> {
236241
None,
237242
One(Box<MatcherPos<'tt>>),
@@ -499,15 +504,17 @@ impl<'tt> TtParser<'tt> {
499504
}
500505

501506
TokenTree::Delimited(_, delimited) => {
502-
// To descend into a delimited submatcher, we push the current matcher onto
503-
// a stack and push a new mp containing the submatcher onto `cur_mps`. When
504-
// we reach the closing delimiter, we will pop the stack to backtrack out
505-
// of the descent. Note that we use `all_tts` to include the open and close
506-
// delimiter tokens.
507-
let tts = mem::replace(&mut mp.tts, &delimited.all_tts);
508-
let idx = mp.idx;
509-
mp.stack.push(MatcherPosFrame { tts, idx });
507+
// To descend into a delimited submatcher, we update `mp` appropriately,
508+
// including enough information to re-ascend afterwards, and push it onto
509+
// `cur_mps`. Later, when we reach the closing delimiter, we will recover
510+
// the parent matcher position to ascend. Note that we use `all_tts` to
511+
// include the open and close delimiter tokens.
512+
let kind = MatcherKind::Delimited(box DelimitedSubmatcher {
513+
parent: Parent { tts: mp.tts, idx: mp.idx, kind: mp.kind },
514+
});
515+
mp.tts = &delimited.all_tts;
510516
mp.idx = 0;
517+
mp.kind = kind;
511518
self.cur_mps.push(mp);
512519
}
513520

@@ -528,9 +535,14 @@ impl<'tt> TtParser<'tt> {
528535
if let TokenKind::CloseDelim(_) = token.kind {
529536
// Ascend out of the delimited submatcher.
530537
debug_assert_eq!(idx, len - 1);
531-
let frame = mp.stack.pop().unwrap();
532-
mp.tts = frame.tts;
533-
mp.idx = frame.idx;
538+
match mp.kind {
539+
MatcherKind::Delimited(submatcher) => {
540+
mp.tts = submatcher.parent.tts;
541+
mp.idx = submatcher.parent.idx;
542+
mp.kind = submatcher.parent.kind;
543+
}
544+
_ => unreachable!(),
545+
}
534546
}
535547
mp.idx += 1;
536548
self.next_mps.push(mp);
@@ -540,45 +552,44 @@ impl<'tt> TtParser<'tt> {
540552
// These cannot appear in a matcher.
541553
TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
542554
}
543-
} else if let Some(sequence) = &mp.sequence {
555+
} else if let MatcherKind::Sequence(box SequenceSubmatcher { parent, seq }) = &mp.kind {
544556
// We are past the end of a sequence.
545557
// - If it has no separator, we must be only one past the end.
546558
// - If it has a separator, we may be one past the end, in which case we must
547559
// look for a separator. Or we may be two past the end, in which case we have
548560
// already dealt with the separator.
549-
debug_assert!(idx == len || idx == len + 1 && sequence.seq.separator.is_some());
561+
debug_assert!(idx == len || idx == len + 1 && seq.separator.is_some());
550562

551563
if idx == len {
552564
// Sequence matching may have finished: move the "dot" past the sequence in
553565
// `parent`. This applies whether a separator is used or not. If sequence
554566
// matching hasn't finished, this `new_mp` will fail quietly when it is
555567
// processed next time around the loop.
556-
let mut new_mp = sequence.parent.clone();
557-
new_mp.matches = mp.matches.clone();
558-
new_mp.match_cur = mp.match_cur;
559-
new_mp.idx += 1;
568+
let new_mp = box MatcherPos {
569+
tts: parent.tts,
570+
idx: parent.idx + 1,
571+
matches: mp.matches.clone(), // a cheap clone
572+
seq_depth: mp.seq_depth - 1,
573+
match_cur: mp.match_cur,
574+
kind: parent.kind.clone(), // an expensive clone
575+
};
560576
self.cur_mps.push(new_mp);
561577
}
562578

563-
if sequence.seq.separator.is_some() && idx == len {
579+
if seq.separator.is_some() && idx == len {
564580
// Look for the separator.
565-
if sequence
566-
.seq
567-
.separator
568-
.as_ref()
569-
.map_or(false, |sep| token_name_eq(token, sep))
570-
{
581+
if seq.separator.as_ref().map_or(false, |sep| token_name_eq(token, sep)) {
571582
// The matcher has a separator, and it matches the current token. We can
572583
// advance past the separator token.
573584
mp.idx += 1;
574585
self.next_mps.push(mp);
575586
}
576-
} else if sequence.seq.kleene.op != mbe::KleeneOp::ZeroOrOne {
587+
} else if seq.kleene.op != mbe::KleeneOp::ZeroOrOne {
577588
// We don't need to look for a separator: either this sequence doesn't have
578589
// one, or it does and we've already handled it. Also, we are allowed to have
579590
// more than one repetition. Move the "dot" back to the beginning of the
580591
// matcher and try to match again.
581-
mp.match_cur -= sequence.seq.num_captures;
592+
mp.match_cur -= seq.num_captures;
582593
mp.idx = 0;
583594
self.cur_mps.push(mp);
584595
}

0 commit comments

Comments
 (0)