Skip to content

Commit 3c79f0c

Browse files
authored
Rollup merge of #125316 - nnethercote:tweak-Spacing, r=petrochenkov
Tweak `Spacing` use Some clean-up precursors to #125174. r? ``@petrochenkov``
2 parents f131ee6 + 4d513cb commit 3c79f0c

File tree

7 files changed

+75
-31
lines changed

7 files changed

+75
-31
lines changed

compiler/rustc_ast/src/tokenstream.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -661,11 +661,11 @@ impl TokenStream {
661661
if attr_style == AttrStyle::Inner {
662662
vec![
663663
TokenTree::token_joint(token::Pound, span),
664-
TokenTree::token_alone(token::Not, span),
664+
TokenTree::token_joint_hidden(token::Not, span),
665665
body,
666666
]
667667
} else {
668-
vec![TokenTree::token_alone(token::Pound, span), body]
668+
vec![TokenTree::token_joint_hidden(token::Pound, span), body]
669669
}
670670
}
671671
}

compiler/rustc_ast_pretty/src/pprust/state.rs

+29-11
Original file line numberDiff line numberDiff line change
@@ -681,22 +681,40 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
681681
}
682682
}
683683

684+
// The easiest way to implement token stream pretty printing would be to
685+
// print each token followed by a single space. But that would produce ugly
686+
// output, so we go to some effort to do better.
687+
//
688+
// First, we track whether each token that appears in source code is
689+
// followed by a space, with `Spacing`, and reproduce that in the output.
690+
// This works well in a lot of cases. E.g. `stringify!(x + y)` produces
691+
// "x + y" and `stringify!(x+y)` produces "x+y".
692+
//
693+
// But this doesn't work for code produced by proc macros (which have no
694+
// original source text representation) nor for code produced by decl
695+
// macros (which are tricky because the whitespace after tokens appearing
696+
// in macro rules isn't always what you want in the produced output). For
697+
// these we mostly use `Spacing::Alone`, which is the conservative choice.
698+
//
699+
// So we have a backup mechanism for when `Spacing::Alone` occurs between a
700+
// pair of tokens: we check if that pair of tokens can obviously go
701+
// together without a space between them. E.g. token `x` followed by token
702+
// `,` is better printed as `x,` than `x ,`. (Even if the original source
703+
// code was `x ,`.)
704+
//
705+
// Finally, we must be careful about changing the output. Token pretty
706+
// printing is used by `stringify!` and `impl Display for
707+
// proc_macro::TokenStream`, and some programs rely on the output having a
708+
// particular form, even though they shouldn't. In particular, some proc
709+
// macros do `format!({stream})` on a token stream and then "parse" the
710+
// output with simple string matching that can't handle whitespace changes.
711+
// E.g. we have seen cases where a proc macro can handle `a :: b` but not
712+
// `a::b`. See #117433 for some examples.
684713
fn print_tts(&mut self, tts: &TokenStream, convert_dollar_crate: bool) {
685714
let mut iter = tts.trees().peekable();
686715
while let Some(tt) = iter.next() {
687716
let spacing = self.print_tt(tt, convert_dollar_crate);
688717
if let Some(next) = iter.peek() {
689-
// Should we print a space after `tt`? There are two guiding
690-
// factors.
691-
// - `spacing` is the more important and accurate one. Most
692-
// tokens have good spacing information, and
693-
// `Joint`/`JointHidden` get used a lot.
694-
// - `space_between` is the backup. Code produced by proc
695-
// macros has worse spacing information, with no
696-
// `JointHidden` usage and too much `Alone` usage, which
697-
// would result in over-spaced output such as
698-
// `( x () , y . z )`. `space_between` avoids some of the
699-
// excess whitespace.
700718
if spacing == Spacing::Alone && space_between(tt, next) {
701719
self.space();
702720
}

compiler/rustc_builtin_macros/src/assert/context.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ impl<'cx, 'a> Context<'cx, 'a> {
153153
fn build_panic(&self, expr_str: &str, panic_path: Path) -> P<Expr> {
154154
let escaped_expr_str = escape_to_fmt(expr_str);
155155
let initial = [
156-
TokenTree::token_joint_hidden(
156+
TokenTree::token_joint(
157157
token::Literal(token::Lit {
158158
kind: token::LitKind::Str,
159159
symbol: Symbol::intern(&if self.fmt_string.is_empty() {
@@ -172,7 +172,7 @@ impl<'cx, 'a> Context<'cx, 'a> {
172172
];
173173
let captures = self.capture_decls.iter().flat_map(|cap| {
174174
[
175-
TokenTree::token_joint_hidden(
175+
TokenTree::token_joint(
176176
token::Ident(cap.ident.name, IdentIsRaw::No),
177177
cap.ident.span,
178178
),

compiler/rustc_expand/src/mbe.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,15 @@ pub(crate) enum KleeneOp {
6868
/// `MetaVarExpr` are "first-class" token trees. Useful for parsing macros.
6969
#[derive(Debug, PartialEq, Encodable, Decodable)]
7070
enum TokenTree {
71+
/// A token. Unlike `tokenstream::TokenTree::Token` this lacks a `Spacing`.
72+
/// See the comments about `Spacing` in the `transcribe` function.
7173
Token(Token),
7274
/// A delimited sequence, e.g. `($e:expr)` (RHS) or `{ $e }` (LHS).
7375
Delimited(DelimSpan, DelimSpacing, Delimited),
7476
/// A kleene-style repetition sequence, e.g. `$($e:expr)*` (RHS) or `$($e),*` (LHS).
7577
Sequence(DelimSpan, SequenceRepetition),
76-
/// e.g., `$var`.
78+
/// e.g., `$var`. The span covers the leading dollar and the ident. (The span within the ident
79+
/// only covers the ident, e.g. `var`.)
7780
MetaVar(Span, Ident),
7881
/// e.g., `$var:expr`. Only appears on the LHS.
7982
MetaVarDecl(Span, Ident /* name to bind */, Option<NonterminalKind>),

compiler/rustc_expand/src/mbe/quoted.rs

+19-11
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,10 @@ pub(super) fn parse(
6262
match tree {
6363
TokenTree::MetaVar(start_sp, ident) if parsing_patterns => {
6464
let span = match trees.next() {
65-
Some(&tokenstream::TokenTree::Token(Token { kind: token::Colon, span }, _)) => {
65+
Some(&tokenstream::TokenTree::Token(
66+
Token { kind: token::Colon, span: colon_span },
67+
_,
68+
)) => {
6669
match trees.next() {
6770
Some(tokenstream::TokenTree::Token(token, _)) => match token.ident() {
6871
Some((fragment, _)) => {
@@ -126,10 +129,12 @@ pub(super) fn parse(
126129
}
127130
_ => token.span,
128131
},
129-
tree => tree.map_or(span, tokenstream::TokenTree::span),
132+
Some(tree) => tree.span(),
133+
None => colon_span,
130134
}
131135
}
132-
tree => tree.map_or(start_sp, tokenstream::TokenTree::span),
136+
Some(tree) => tree.span(),
137+
None => start_sp,
133138
};
134139

135140
result.push(TokenTree::MetaVarDecl(span, ident, None));
@@ -176,7 +181,7 @@ fn parse_tree<'a>(
176181
// Depending on what `tree` is, we could be parsing different parts of a macro
177182
match tree {
178183
// `tree` is a `$` token. Look at the next token in `trees`
179-
&tokenstream::TokenTree::Token(Token { kind: token::Dollar, span }, _) => {
184+
&tokenstream::TokenTree::Token(Token { kind: token::Dollar, span: dollar_span }, _) => {
180185
// FIXME: Handle `Invisible`-delimited groups in a more systematic way
181186
// during parsing.
182187
let mut next = outer_trees.next();
@@ -209,7 +214,7 @@ fn parse_tree<'a>(
209214
err.emit();
210215
// Returns early the same read `$` to avoid spanning
211216
// unrelated diagnostics that could be performed afterwards
212-
return TokenTree::token(token::Dollar, span);
217+
return TokenTree::token(token::Dollar, dollar_span);
213218
}
214219
Ok(elem) => {
215220
maybe_emit_macro_metavar_expr_feature(
@@ -251,7 +256,7 @@ fn parse_tree<'a>(
251256
// special metavariable that names the crate of the invocation.
252257
Some(tokenstream::TokenTree::Token(token, _)) if token.is_ident() => {
253258
let (ident, is_raw) = token.ident().unwrap();
254-
let span = ident.span.with_lo(span.lo());
259+
let span = ident.span.with_lo(dollar_span.lo());
255260
if ident.name == kw::Crate && matches!(is_raw, IdentIsRaw::No) {
256261
TokenTree::token(token::Ident(kw::DollarCrate, is_raw), span)
257262
} else {
@@ -260,16 +265,19 @@ fn parse_tree<'a>(
260265
}
261266

262267
// `tree` is followed by another `$`. This is an escaped `$`.
263-
Some(&tokenstream::TokenTree::Token(Token { kind: token::Dollar, span }, _)) => {
268+
Some(&tokenstream::TokenTree::Token(
269+
Token { kind: token::Dollar, span: dollar_span2 },
270+
_,
271+
)) => {
264272
if parsing_patterns {
265273
span_dollar_dollar_or_metavar_in_the_lhs_err(
266274
sess,
267-
&Token { kind: token::Dollar, span },
275+
&Token { kind: token::Dollar, span: dollar_span2 },
268276
);
269277
} else {
270-
maybe_emit_macro_metavar_expr_feature(features, sess, span);
278+
maybe_emit_macro_metavar_expr_feature(features, sess, dollar_span2);
271279
}
272-
TokenTree::token(token::Dollar, span)
280+
TokenTree::token(token::Dollar, dollar_span2)
273281
}
274282

275283
// `tree` is followed by some other token. This is an error.
@@ -281,7 +289,7 @@ fn parse_tree<'a>(
281289
}
282290

283291
// There are no more tokens. Just return the `$` we already have.
284-
None => TokenTree::token(token::Dollar, span),
292+
None => TokenTree::token(token::Dollar, dollar_span),
285293
}
286294
}
287295

compiler/rustc_expand/src/mbe/transcribe.rs

+15
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,23 @@ pub(super) fn transcribe<'a>(
253253
mbe::TokenTree::MetaVar(mut sp, mut original_ident) => {
254254
// Find the matched nonterminal from the macro invocation, and use it to replace
255255
// the meta-var.
256+
//
257+
// We use `Spacing::Alone` everywhere here, because that's the conservative choice
258+
// and spacing of declarative macros is tricky. E.g. in this macro:
259+
// ```
260+
// macro_rules! idents {
261+
// ($($a:ident,)*) => { stringify!($($a)*) }
262+
// }
263+
// ```
264+
// `$a` has no whitespace after it and will be marked `JointHidden`. If you then
265+
// call `idents!(x,y,z,)`, each of `x`, `y`, and `z` will be marked as `Joint`. So
266+
// if you choose to use `$x`'s spacing or the identifier's spacing, you'll end up
267+
// producing "xyz", which is bad because it effectively merges tokens.
268+
// `Spacing::Alone` is the safer option. Fortunately, `space_between` will avoid
269+
// some of the unnecessary whitespace.
256270
let ident = MacroRulesNormalizedIdent::new(original_ident);
257271
if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) {
272+
// njn: explain the use of alone here
258273
let tt = match cur_matched {
259274
MatchedSingle(ParseNtResult::Tt(tt)) => {
260275
// `tt`s are emitted into the output stream directly as "raw tokens",

compiler/rustc_expand/src/proc_macro_server.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -309,10 +309,10 @@ impl ToInternal<SmallVec<[tokenstream::TokenTree; 2]>>
309309
use rustc_ast::token::*;
310310

311311
// The code below is conservative, using `token_alone`/`Spacing::Alone`
312-
// in most places. When the resulting code is pretty-printed by
313-
// `print_tts` it ends up with spaces between most tokens, which is
314-
// safe but ugly. It's hard in general to do better when working at the
315-
// token level.
312+
// in most places. It's hard in general to do better when working at
313+
// the token level. When the resulting code is pretty-printed by
314+
// `print_tts` the `space_between` function helps avoid a lot of
315+
// unnecessary whitespace, so the results aren't too bad.
316316
let (tree, rustc) = self;
317317
match tree {
318318
TokenTree::Punct(Punct { ch, joint, span }) => {

0 commit comments

Comments
 (0)