Skip to content

Commit 95f6870

Browse files
committed
Auto merge of #95509 - nnethercote:simplify-MatcherPos-some-more, r=petrochenkov
Simplify `MatcherPos` some more A few more improvements. r? `@petrochenkov`
2 parents 79f178b + c6fedd4 commit 95f6870

File tree

2 files changed

+120
-95
lines changed

2 files changed

+120
-95
lines changed

compiler/rustc_expand/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#![feature(associated_type_bounds)]
22
#![feature(associated_type_defaults)]
3+
#![feature(box_patterns)]
34
#![feature(box_syntax)]
45
#![feature(crate_visibility_modifier)]
56
#![feature(decl_macro)]

compiler/rustc_expand/src/mbe/macro_parser.rs

+119-95
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ crate use ParseResult::*;
7575

7676
use crate::mbe::{self, SequenceRepetition, TokenTree};
7777

78-
use rustc_ast::token::{self, DocComment, Nonterminal, Token};
78+
use rustc_ast::token::{self, DocComment, Nonterminal, Token, TokenKind};
7979
use rustc_parse::parser::{NtOrTt, Parser};
8080
use rustc_session::parse::ParseSess;
8181
use rustc_span::symbol::MacroRulesNormalizedIdent;
@@ -87,17 +87,6 @@ use rustc_data_structures::sync::Lrc;
8787
use rustc_span::symbol::Ident;
8888
use std::borrow::Cow;
8989
use std::collections::hash_map::Entry::{Occupied, Vacant};
90-
use std::mem;
91-
92-
/// This is used by `parse_tt_inner` to keep track of delimited submatchers that we have
93-
/// descended into.
94-
#[derive(Clone)]
95-
struct MatcherPosFrame<'tt> {
96-
/// The "parent" matcher that we have descended from.
97-
tts: &'tt [TokenTree],
98-
/// The position of the "dot" in `tt` at the time we descended.
99-
idx: usize,
100-
}
10190

10291
// One element is enough to cover 95-99% of vectors for most benchmarks. Also,
10392
// vectors longer than one frequently have many elements, not just two or
@@ -108,6 +97,33 @@ type NamedMatchVec = SmallVec<[NamedMatch; 1]>;
10897
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
10998
rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
11099

100+
#[derive(Clone)]
101+
enum MatcherKind<'tt> {
102+
TopLevel,
103+
Delimited(Box<DelimitedSubmatcher<'tt>>),
104+
Sequence(Box<SequenceSubmatcher<'tt>>),
105+
}
106+
107+
#[derive(Clone)]
108+
struct DelimitedSubmatcher<'tt> {
109+
parent: Parent<'tt>,
110+
}
111+
112+
#[derive(Clone)]
113+
struct SequenceSubmatcher<'tt> {
114+
parent: Parent<'tt>,
115+
seq: &'tt SequenceRepetition,
116+
}
117+
118+
/// Data used to ascend from a submatcher back to its parent matcher. A subset of the fields from
119+
/// `MathcherPos`.
120+
#[derive(Clone)]
121+
struct Parent<'tt> {
122+
tts: &'tt [TokenTree],
123+
idx: usize,
124+
kind: MatcherKind<'tt>,
125+
}
126+
111127
/// A single matcher position, which could be within the top-level matcher, a submatcher, a
112128
/// subsubmatcher, etc. For example:
113129
/// ```text
@@ -116,13 +132,14 @@ rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
116132
/// <--------------> first submatcher; three tts, zero metavars
117133
/// <--------------------------> top-level matcher; two tts, one metavar
118134
/// ```
119-
#[derive(Clone)]
120135
struct MatcherPos<'tt> {
121136
/// The tokens that make up the current matcher. When we are within a `Sequence` or `Delimited`
122137
/// submatcher, this is just the contents of that submatcher.
123138
tts: &'tt [TokenTree],
124139

125-
/// The "dot" position within the current submatcher, i.e. the index into `tts`.
140+
/// The "dot" position within the current submatcher, i.e. the index into `tts`. Can go one or
141+
/// two positions past the final elements in `tts` when dealing with sequences, see
142+
/// `parse_tt_inner` for details.
126143
idx: usize,
127144

128145
/// This vector ends up with one element per metavar in the *top-level* matcher, even when this
@@ -134,25 +151,18 @@ struct MatcherPos<'tt> {
134151
/// The number of sequences this mp is within.
135152
seq_depth: usize,
136153

137-
/// The position in `matches` of the first metavar in this (sub)matcher. Zero if there are
138-
/// no metavars.
139-
match_lo: usize,
140-
141154
/// The position in `matches` of the next metavar to be matched against the source token
142155
/// stream. Should not be used if there are no metavars.
143156
match_cur: usize,
144157

145-
/// This field is only used if we are matching a sequence.
146-
sequence: Option<MatcherPosSequence<'tt>>,
147-
148-
/// When we are within a `Delimited` submatcher (or subsubmatcher), this tracks the parent
149-
/// matcher(s). The bottom of the stack is the top-level matcher.
150-
stack: SmallVec<[MatcherPosFrame<'tt>; 1]>,
158+
/// What kind of matcher we are in. For submatchers, this contains enough information to
159+
/// reconstitute a `MatcherPos` within the parent once we ascend out of the submatcher.
160+
kind: MatcherKind<'tt>,
151161
}
152162

153163
// This type is used a lot. Make sure it doesn't unintentionally get bigger.
154164
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
155-
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 104);
165+
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 64);
156166

157167
impl<'tt> MatcherPos<'tt> {
158168
fn top_level(matcher: &'tt [TokenTree], empty_matches: Lrc<NamedMatchVec>) -> Self {
@@ -161,31 +171,50 @@ impl<'tt> MatcherPos<'tt> {
161171
idx: 0,
162172
matches: empty_matches,
163173
seq_depth: 0,
164-
match_lo: 0,
165174
match_cur: 0,
166-
stack: smallvec![],
167-
sequence: None,
175+
kind: MatcherKind::TopLevel,
168176
}
169177
}
170178

179+
fn empty_sequence(
180+
parent_mp: &MatcherPos<'tt>,
181+
seq: &'tt SequenceRepetition,
182+
empty_matches: Lrc<NamedMatchVec>,
183+
) -> Self {
184+
let mut mp = MatcherPos {
185+
tts: parent_mp.tts,
186+
idx: parent_mp.idx + 1,
187+
matches: parent_mp.matches.clone(), // a cheap clone
188+
seq_depth: parent_mp.seq_depth,
189+
match_cur: parent_mp.match_cur + seq.num_captures,
190+
kind: parent_mp.kind.clone(), // an expensive clone
191+
};
192+
for idx in parent_mp.match_cur..parent_mp.match_cur + seq.num_captures {
193+
mp.push_match(idx, MatchedSeq(empty_matches.clone()));
194+
}
195+
mp
196+
}
197+
171198
fn sequence(
172-
parent: Box<MatcherPos<'tt>>,
199+
parent_mp: Box<MatcherPos<'tt>>,
173200
seq: &'tt SequenceRepetition,
174201
empty_matches: Lrc<NamedMatchVec>,
175202
) -> Self {
203+
let seq_kind = box SequenceSubmatcher {
204+
parent: Parent { tts: parent_mp.tts, idx: parent_mp.idx, kind: parent_mp.kind },
205+
seq,
206+
};
176207
let mut mp = MatcherPos {
177208
tts: &seq.tts,
178209
idx: 0,
179-
matches: parent.matches.clone(),
180-
seq_depth: parent.seq_depth,
181-
match_lo: parent.match_cur,
182-
match_cur: parent.match_cur,
183-
sequence: Some(MatcherPosSequence { parent, seq }),
184-
stack: smallvec![],
210+
matches: parent_mp.matches,
211+
seq_depth: parent_mp.seq_depth,
212+
match_cur: parent_mp.match_cur,
213+
kind: MatcherKind::Sequence(seq_kind),
185214
};
186215
// Start with an empty vec for each metavar within the sequence. Note that `mp.seq_depth`
187216
// must have the parent's depth at this point for these `push_match` calls to work.
188-
for idx in mp.match_lo..mp.match_lo + seq.num_captures {
217+
for idx in mp.match_cur..mp.match_cur + seq.num_captures {
189218
mp.push_match(idx, MatchedSeq(empty_matches.clone()));
190219
}
191220
mp.seq_depth += 1;
@@ -226,16 +255,6 @@ impl<'tt> MatcherPos<'tt> {
226255
}
227256
}
228257

229-
#[derive(Clone)]
230-
struct MatcherPosSequence<'tt> {
231-
/// The parent matcher position. Effectively gives a linked list of matches all the way to the
232-
/// top-level matcher.
233-
parent: Box<MatcherPos<'tt>>,
234-
235-
/// The sequence itself.
236-
seq: &'tt SequenceRepetition,
237-
}
238-
239258
enum EofMatcherPositions<'tt> {
240259
None,
241260
One(Box<MatcherPos<'tt>>),
@@ -448,18 +467,6 @@ impl<'tt> TtParser<'tt> {
448467
let mut eof_mps = EofMatcherPositions::None;
449468

450469
while let Some(mut mp) = self.cur_mps.pop() {
451-
// Backtrack out of delimited submatcher when necessary. When backtracking out again,
452-
// we need to advance the "dot" past the delimiters in the parent matcher(s).
453-
while mp.idx >= mp.tts.len() {
454-
match mp.stack.pop() {
455-
Some(MatcherPosFrame { tts, idx }) => {
456-
mp.tts = tts;
457-
mp.idx = idx + 1;
458-
}
459-
None => break,
460-
}
461-
}
462-
463470
// Get the current position of the "dot" (`idx`) in `mp` and the number of token
464471
// trees in the matcher (`len`).
465472
let idx = mp.idx;
@@ -473,13 +480,11 @@ impl<'tt> TtParser<'tt> {
473480
let op = seq.kleene.op;
474481
if op == mbe::KleeneOp::ZeroOrMore || op == mbe::KleeneOp::ZeroOrOne {
475482
// Allow for the possibility of zero matches of this sequence.
476-
let mut new_mp = mp.clone();
477-
new_mp.match_cur += seq.num_captures;
478-
new_mp.idx += 1;
479-
for idx in mp.match_cur..mp.match_cur + seq.num_captures {
480-
new_mp.push_match(idx, MatchedSeq(self.empty_matches.clone()));
481-
}
482-
self.cur_mps.push(new_mp);
483+
self.cur_mps.push(box MatcherPos::empty_sequence(
484+
&*mp,
485+
&seq,
486+
self.empty_matches.clone(),
487+
));
483488
}
484489

485490
// Allow for the possibility of one or more matches of this sequence.
@@ -509,16 +514,17 @@ impl<'tt> TtParser<'tt> {
509514
}
510515

511516
TokenTree::Delimited(_, delimited) => {
512-
// To descend into a delimited submatcher, we push the current matcher onto
513-
// a stack and push a new mp containing the submatcher onto `cur_mps`.
514-
//
515-
// At the beginning of the loop, if we reach the end of the delimited
516-
// submatcher, we pop the stack to backtrack out of the descent. Note that
517-
// we use `all_tts` to include the open and close delimiter tokens.
518-
let tts = mem::replace(&mut mp.tts, &delimited.all_tts);
519-
let idx = mp.idx;
520-
mp.stack.push(MatcherPosFrame { tts, idx });
517+
// To descend into a delimited submatcher, we update `mp` appropriately,
518+
// including enough information to re-ascend afterwards, and push it onto
519+
// `cur_mps`. Later, when we reach the closing delimiter, we will recover
520+
// the parent matcher position to ascend. Note that we use `all_tts` to
521+
// include the open and close delimiter tokens.
522+
let kind = MatcherKind::Delimited(box DelimitedSubmatcher {
523+
parent: Parent { tts: mp.tts, idx: mp.idx, kind: mp.kind },
524+
});
525+
mp.tts = &delimited.all_tts;
521526
mp.idx = 0;
527+
mp.kind = kind;
522528
self.cur_mps.push(mp);
523529
}
524530

@@ -536,6 +542,18 @@ impl<'tt> TtParser<'tt> {
536542
mp.idx += 1;
537543
self.cur_mps.push(mp);
538544
} else if token_name_eq(&t, token) {
545+
if let TokenKind::CloseDelim(_) = token.kind {
546+
// Ascend out of the delimited submatcher.
547+
debug_assert_eq!(idx, len - 1);
548+
match mp.kind {
549+
MatcherKind::Delimited(submatcher) => {
550+
mp.tts = submatcher.parent.tts;
551+
mp.idx = submatcher.parent.idx;
552+
mp.kind = submatcher.parent.kind;
553+
}
554+
_ => unreachable!(),
555+
}
556+
}
539557
mp.idx += 1;
540558
self.next_mps.push(mp);
541559
}
@@ -544,38 +562,44 @@ impl<'tt> TtParser<'tt> {
544562
// These cannot appear in a matcher.
545563
TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
546564
}
547-
} else if let Some(sequence) = &mp.sequence {
565+
} else if let MatcherKind::Sequence(box SequenceSubmatcher { parent, seq }) = &mp.kind {
548566
// We are past the end of a sequence.
549-
debug_assert!(idx <= len + 1);
567+
// - If it has no separator, we must be only one past the end.
568+
// - If it has a separator, we may be one past the end, in which case we must
569+
// look for a separator. Or we may be two past the end, in which case we have
570+
// already dealt with the separator.
571+
debug_assert!(idx == len || idx == len + 1 && seq.separator.is_some());
550572

551573
if idx == len {
552-
// Add all matches from the sequence to `parent`, and move the "dot" past the
553-
// sequence in `parent`. This allows for the case where the sequence matching
554-
// is finished.
555-
let mut new_mp = sequence.parent.clone();
556-
new_mp.matches = mp.matches.clone();
557-
new_mp.match_cur = mp.match_lo + sequence.seq.num_captures;
558-
new_mp.idx += 1;
574+
// Sequence matching may have finished: move the "dot" past the sequence in
575+
// `parent`. This applies whether a separator is used or not. If sequence
576+
// matching hasn't finished, this `new_mp` will fail quietly when it is
577+
// processed next time around the loop.
578+
let new_mp = box MatcherPos {
579+
tts: parent.tts,
580+
idx: parent.idx + 1,
581+
matches: mp.matches.clone(), // a cheap clone
582+
seq_depth: mp.seq_depth - 1,
583+
match_cur: mp.match_cur,
584+
kind: parent.kind.clone(), // an expensive clone
585+
};
559586
self.cur_mps.push(new_mp);
560587
}
561588

562-
if idx == len && sequence.seq.separator.is_some() {
563-
if sequence
564-
.seq
565-
.separator
566-
.as_ref()
567-
.map_or(false, |sep| token_name_eq(token, sep))
568-
{
589+
if seq.separator.is_some() && idx == len {
590+
// Look for the separator.
591+
if seq.separator.as_ref().map_or(false, |sep| token_name_eq(token, sep)) {
569592
// The matcher has a separator, and it matches the current token. We can
570593
// advance past the separator token.
571594
mp.idx += 1;
572595
self.next_mps.push(mp);
573596
}
574-
} else if sequence.seq.kleene.op != mbe::KleeneOp::ZeroOrOne {
575-
// We don't need a separator. Move the "dot" back to the beginning of the
576-
// matcher and try to match again UNLESS we are only allowed to have _one_
577-
// repetition.
578-
mp.match_cur = mp.match_lo;
597+
} else if seq.kleene.op != mbe::KleeneOp::ZeroOrOne {
598+
// We don't need to look for a separator: either this sequence doesn't have
599+
// one, or it does and we've already handled it. Also, we are allowed to have
600+
// more than one repetition. Move the "dot" back to the beginning of the
601+
// matcher and try to match again.
602+
mp.match_cur -= seq.num_captures;
579603
mp.idx = 0;
580604
self.cur_mps.push(mp);
581605
}

0 commit comments

Comments
 (0)