Skip to content

Commit a6942db

Browse files
committed
Improve print_tts by changing tokenstream::Spacing.
`proc_macro::Spacing` only appears on `Punct` tokens, where: - `Joint` means "the next token follows immediately and is a `Punct`. - `Alone` means "the next token doesn't follow immediately *or* it follows immediately and is not a `Punct`. `tokenstream::Spacing` appears on all `TokenTree::Token` instances, both punct and non-punct, where: - `Joint` means "the next token follows immediately and is a punct" (i.e. satisfies `is_op`). - `Alone` means "the next token doesn't follow immediately *or* it follows immediately and is not a punct". The fact that `Alone` is used for two different cases is awkward. This commit replaces `tokenstream::Spacing` with a new type `FollowedBy` that separates those two cases: - `Space` means "the next token doesn't follow immediately" - `Op` means "the next token follows immediately and is a punct". - `Other` means "the next token follows immediately and is not a punct". The mapping from old to new is: - `Joint` -> `Op` - `Alone` -> `Space` or `Other`, depending on the situation. We can use `FollowedBy` to *drastically* improve the output of `print_tts`. For example, this: ``` stringify!(let a: Vec<u32> = vec![];) ``` currently produces this string: ``` let a : Vec < u32 > = vec! [] ; ``` With this PR, it now produces this string: ``` let a: Vec<u32> = vec![] ; ``` (The space after the `]` is because `TokenTree::Delimited` currently doesn't have `FollowedBy` information. Adding this as a follow-up should be straightforward.) The new `print_tts` doesn't replicate original code perfectly. E.g. multiple space characters will be condensed into a single space character. But it's much improved. `print_tts` still produces the old, uglier output for code produced by proc macros. Because we have to translate the generated code from `Spacing` to the more expressive `Followed`, which results in too much `FollowedBy::Space` usage and no `FollowedBy::Other` usage. So `tt_prepend_space` still exists and is used by `print_tts` in conjunction with the `FollowedBy` field. This change will also help with the removal of `Token::Interpolated`. Currently interpolated tokens are pretty-printed nicely via AST pretty printing. `Token::Interpolated` removal will mean they get printed with `print_tts`. Without this change, that would result in much uglier output for code produced by decl macro expansions. With this change, AST pretty printing and `print_tts` produce similar results.
1 parent 00e93e1 commit a6942db

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+490
-246
lines changed

compiler/rustc_ast/src/attr/mod.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use crate::ast::{MetaItem, MetaItemKind, NestedMetaItem, NormalAttr};
66
use crate::ast::{Path, PathSegment, DUMMY_NODE_ID};
77
use crate::ptr::P;
88
use crate::token::{self, CommentKind, Delimiter, Token};
9-
use crate::tokenstream::{DelimSpan, Spacing, TokenTree};
9+
use crate::tokenstream::{DelimSpan, FollowedBy, TokenTree};
1010
use crate::tokenstream::{LazyAttrTokenStream, TokenStream};
1111
use crate::util::comments;
1212
use crate::util::literal::escape_string_symbol;
@@ -183,7 +183,7 @@ impl Attribute {
183183
.to_tokenstream(),
184184
&AttrKind::DocComment(comment_kind, data) => TokenStream::new(vec![TokenTree::Token(
185185
Token::new(token::DocComment(comment_kind, self.style, data), self.span),
186-
Spacing::Alone,
186+
FollowedBy::Space,
187187
)]),
188188
}
189189
}
@@ -570,7 +570,7 @@ pub fn mk_attr_nested_word(
570570
) -> Attribute {
571571
let inner_tokens = TokenStream::new(vec![TokenTree::Token(
572572
Token::from_ast_ident(Ident::new(inner, span)),
573-
Spacing::Alone,
573+
FollowedBy::Space,
574574
)]);
575575
let outer_ident = Ident::new(outer, span);
576576
let path = Path::from_ident(outer_ident);

compiler/rustc_ast/src/tokenstream.rs

+84-52
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ use std::{cmp, fmt, iter, mem};
4444
pub enum TokenTree {
4545
/// A single token. Should never be `OpenDelim` or `CloseDelim`, because
4646
/// delimiters are implicitly represented by `Delimited`.
47-
Token(Token, Spacing),
47+
Token(Token, FollowedBy),
4848
/// A delimited sequence of token trees.
4949
Delimited(DelimSpan, Delimiter, TokenStream),
5050
}
@@ -54,7 +54,7 @@ pub enum TokenTree {
5454
fn _dummy()
5555
where
5656
Token: sync::DynSend + sync::DynSync,
57-
Spacing: sync::DynSend + sync::DynSync,
57+
FollowedBy: sync::DynSend + sync::DynSync,
5858
DelimSpan: sync::DynSend + sync::DynSync,
5959
Delimiter: sync::DynSend + sync::DynSync,
6060
TokenStream: sync::DynSend + sync::DynSync,
@@ -89,20 +89,25 @@ impl TokenTree {
8989
}
9090
}
9191

92-
/// Create a `TokenTree::Token` with alone spacing.
93-
pub fn token_alone(kind: TokenKind, span: Span) -> TokenTree {
94-
TokenTree::Token(Token::new(kind, span), Spacing::Alone)
92+
/// Create a `TokenTree::Token` with `FollowedBy::Space`.
93+
pub fn token_fby_space(kind: TokenKind, span: Span) -> TokenTree {
94+
TokenTree::Token(Token::new(kind, span), FollowedBy::Space)
9595
}
9696

97-
/// Create a `TokenTree::Token` with joint spacing.
98-
pub fn token_joint(kind: TokenKind, span: Span) -> TokenTree {
99-
TokenTree::Token(Token::new(kind, span), Spacing::Joint)
97+
/// Create a `TokenTree::Token` with `FollowedBy::Punct`.
98+
pub fn token_fby_punct(kind: TokenKind, span: Span) -> TokenTree {
99+
TokenTree::Token(Token::new(kind, span), FollowedBy::Punct)
100+
}
101+
102+
/// Create a `TokenTree::Token` with `FollowedBy::Other`.
103+
pub fn token_fby_other(kind: TokenKind, span: Span) -> TokenTree {
104+
TokenTree::Token(Token::new(kind, span), FollowedBy::Other)
100105
}
101106

102107
pub fn uninterpolate(&self) -> Cow<'_, TokenTree> {
103108
match self {
104-
TokenTree::Token(token, spacing) => match token.uninterpolate() {
105-
Cow::Owned(token) => Cow::Owned(TokenTree::Token(token, *spacing)),
109+
TokenTree::Token(token, fby) => match token.uninterpolate() {
110+
Cow::Owned(token) => Cow::Owned(TokenTree::Token(token, *fby)),
106111
Cow::Borrowed(_) => Cow::Borrowed(self),
107112
},
108113
_ => Cow::Borrowed(self),
@@ -182,7 +187,7 @@ pub struct AttrTokenStream(pub Lrc<Vec<AttrTokenTree>>);
182187
/// Like `TokenTree`, but for `AttrTokenStream`.
183188
#[derive(Clone, Debug, Encodable, Decodable)]
184189
pub enum AttrTokenTree {
185-
Token(Token, Spacing),
190+
Token(Token, FollowedBy),
186191
Delimited(DelimSpan, Delimiter, AttrTokenStream),
187192
/// Stores the attributes for an attribute target,
188193
/// along with the tokens for that attribute target.
@@ -205,8 +210,8 @@ impl AttrTokenStream {
205210
.0
206211
.iter()
207212
.flat_map(|tree| match &tree {
208-
AttrTokenTree::Token(inner, spacing) => {
209-
smallvec![TokenTree::Token(inner.clone(), *spacing)].into_iter()
213+
AttrTokenTree::Token(inner, fby) => {
214+
smallvec![TokenTree::Token(inner.clone(), *fby)].into_iter()
210215
}
211216
AttrTokenTree::Delimited(span, delim, stream) => {
212217
smallvec![TokenTree::Delimited(*span, *delim, stream.to_tokenstream()),]
@@ -307,21 +312,40 @@ pub struct AttributesData {
307312
#[derive(Clone, Debug, Default, Encodable, Decodable)]
308313
pub struct TokenStream(pub(crate) Lrc<Vec<TokenTree>>);
309314

310-
/// Similar to `proc_macro::Spacing`, but for tokens.
311-
///
312-
/// Note that all `ast::TokenTree::Token` instances have a `Spacing`, but when
313-
/// we convert to `proc_macro::TokenTree` for proc macros only `Punct`
314-
/// `TokenTree`s have a `proc_macro::Spacing`.
315+
/// Describes what immediately follows a token. Used for pretty-printing and
316+
/// conversions to `proc_macro::Spacing`.
315317
#[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
316-
pub enum Spacing {
317-
/// The token is not immediately followed by an operator token (as
318-
/// determined by `Token::is_op`). E.g. a `+` token is `Alone` in `+ =`,
319-
/// `+/*foo*/=`, `+ident`, and `+()`.
320-
Alone,
321-
322-
/// The token is immediately followed by an operator token. E.g. a `+`
323-
/// token is `Joint` in `+=` and `++`.
324-
Joint,
318+
pub enum FollowedBy {
319+
/// The token is immediately followed by whitespace or a non-doc comment.
320+
/// When constructing token streams, use this for each token that should be
321+
/// pretty-printed with a space after it.
322+
///
323+
/// Converts to `Spacing::Alone`, and `Spacing::Alone` converts back to
324+
/// this.
325+
Space,
326+
327+
/// The token is immediately followed by punctuation (as determined by
328+
/// `Token::is_punct`). When constructing token streams, use this for each
329+
/// token that (a) should be pretty-printed without a space after it, and
330+
/// (b) is followed by an punctuation token.
331+
///
332+
/// Converts to `Spacing::Joint`, and `Spacing::Joint` converts back to
333+
/// this.
334+
Punct,
335+
336+
/// The token is immediately followed by something else: an identifier,
337+
/// lifetime, literal, delimiter, doc comment, or EOF. When constructing
338+
/// token streams, use this for each token that (a) should be
339+
/// pretty-printed without a space after it, and (b) is followed by a
340+
/// non-punctuation token.
341+
///
342+
/// Converts to `Spacing::Alone`, but `Spacing::Alone` converts back to
343+
/// `FollowedBy::Space`. Because of that, pretty-printing of `TokenStream`s
344+
/// produced by proc macros is unavoidably uglier (with more whitespace
345+
/// between tokens) than pretty-printing of `TokenStream`'s produced by
346+
/// other means (i.e. user-written code, internally constructed token
347+
/// streams, and token streams produced by declarative macros).
348+
Other,
325349
}
326350

327351
impl TokenStream {
@@ -336,7 +360,7 @@ impl TokenStream {
336360
let sp = match (&ts, &next) {
337361
(_, TokenTree::Token(Token { kind: token::Comma, .. }, _)) => continue,
338362
(
339-
TokenTree::Token(token_left, Spacing::Alone),
363+
TokenTree::Token(token_left, FollowedBy::Space | FollowedBy::Other),
340364
TokenTree::Token(token_right, _),
341365
) if ((token_left.is_ident() && !token_left.is_reserved_ident())
342366
|| token_left.is_lit())
@@ -349,7 +373,7 @@ impl TokenStream {
349373
_ => continue,
350374
};
351375
let sp = sp.shrink_to_hi();
352-
let comma = TokenTree::token_alone(token::Comma, sp);
376+
let comma = TokenTree::token_fby_space(token::Comma, sp);
353377
suggestion = Some((pos, comma, sp));
354378
}
355379
}
@@ -425,14 +449,22 @@ impl TokenStream {
425449
self
426450
}
427451

428-
/// Create a token stream containing a single token with alone spacing.
429-
pub fn token_alone(kind: TokenKind, span: Span) -> TokenStream {
430-
TokenStream::new(vec![TokenTree::token_alone(kind, span)])
452+
/// Create a token stream containing a single token with
453+
/// `FollowedBy::Space`.
454+
pub fn token_fby_space(kind: TokenKind, span: Span) -> TokenStream {
455+
TokenStream::new(vec![TokenTree::token_fby_space(kind, span)])
456+
}
457+
458+
/// Create a token stream containing a single token with
459+
/// `FollowedBy::Punct`.
460+
pub fn token_fby_punct(kind: TokenKind, span: Span) -> TokenStream {
461+
TokenStream::new(vec![TokenTree::token_fby_punct(kind, span)])
431462
}
432463

433-
/// Create a token stream containing a single token with joint spacing.
434-
pub fn token_joint(kind: TokenKind, span: Span) -> TokenStream {
435-
TokenStream::new(vec![TokenTree::token_joint(kind, span)])
464+
/// Create a token stream containing a single token with
465+
/// `FollowedBy::Other`.
466+
pub fn token_fby_other(kind: TokenKind, span: Span) -> TokenStream {
467+
TokenStream::new(vec![TokenTree::token_fby_other(kind, span)])
436468
}
437469

438470
/// Create a token stream containing a single `Delimited`.
@@ -458,16 +490,16 @@ impl TokenStream {
458490
pub fn from_nonterminal_ast(nt: &Nonterminal) -> TokenStream {
459491
match nt {
460492
Nonterminal::NtIdent(ident, is_raw) => {
461-
TokenStream::token_alone(token::Ident(ident.name, *is_raw), ident.span)
493+
TokenStream::token_fby_space(token::Ident(ident.name, *is_raw), ident.span)
462494
}
463495
Nonterminal::NtLifetime(ident) => {
464-
TokenStream::token_alone(token::Lifetime(ident.name), ident.span)
496+
TokenStream::token_fby_space(token::Lifetime(ident.name), ident.span)
465497
}
466498
Nonterminal::NtItem(item) => TokenStream::from_ast(item),
467499
Nonterminal::NtBlock(block) => TokenStream::from_ast(block),
468500
Nonterminal::NtStmt(stmt) if let StmtKind::Empty = stmt.kind => {
469501
// FIXME: Properly collect tokens for empty statements.
470-
TokenStream::token_alone(token::Semi, stmt.span)
502+
TokenStream::token_fby_space(token::Semi, stmt.span)
471503
}
472504
Nonterminal::NtStmt(stmt) => TokenStream::from_ast(stmt),
473505
Nonterminal::NtPat(pat) => TokenStream::from_ast(pat),
@@ -479,23 +511,23 @@ impl TokenStream {
479511
}
480512
}
481513

482-
fn flatten_token(token: &Token, spacing: Spacing) -> TokenTree {
514+
fn flatten_token(token: &Token, fby: FollowedBy) -> TokenTree {
483515
match &token.kind {
484516
token::Interpolated(nt) if let token::NtIdent(ident, is_raw) = **nt => {
485-
TokenTree::Token(Token::new(token::Ident(ident.name, is_raw), ident.span), spacing)
517+
TokenTree::Token(Token::new(token::Ident(ident.name, is_raw), ident.span), fby)
486518
}
487519
token::Interpolated(nt) => TokenTree::Delimited(
488520
DelimSpan::from_single(token.span),
489521
Delimiter::Invisible,
490522
TokenStream::from_nonterminal_ast(nt).flattened(),
491523
),
492-
_ => TokenTree::Token(token.clone(), spacing),
524+
_ => TokenTree::Token(token.clone(), fby),
493525
}
494526
}
495527

496528
fn flatten_token_tree(tree: &TokenTree) -> TokenTree {
497529
match tree {
498-
TokenTree::Token(token, spacing) => TokenStream::flatten_token(token, *spacing),
530+
TokenTree::Token(token, fby) => TokenStream::flatten_token(token, *fby),
499531
TokenTree::Delimited(span, delim, tts) => {
500532
TokenTree::Delimited(*span, *delim, tts.flattened())
501533
}
@@ -521,13 +553,13 @@ impl TokenStream {
521553
// If `vec` is not empty, try to glue `tt` onto its last token. The return
522554
// value indicates if gluing took place.
523555
fn try_glue_to_last(vec: &mut Vec<TokenTree>, tt: &TokenTree) -> bool {
524-
if let Some(TokenTree::Token(last_tok, Spacing::Joint)) = vec.last()
525-
&& let TokenTree::Token(tok, spacing) = tt
556+
if let Some(TokenTree::Token(last_tok, FollowedBy::Punct)) = vec.last()
557+
&& let TokenTree::Token(tok, fby) = tt
526558
&& let Some(glued_tok) = last_tok.glue(tok)
527559
{
528560
// ...then overwrite the last token tree in `vec` with the
529561
// glued token, and skip the first token tree from `stream`.
530-
*vec.last_mut().unwrap() = TokenTree::Token(glued_tok, *spacing);
562+
*vec.last_mut().unwrap() = TokenTree::Token(glued_tok, *fby);
531563
true
532564
} else {
533565
false
@@ -583,7 +615,7 @@ impl TokenStream {
583615
match tt {
584616
&TokenTree::Token(
585617
Token { kind: token::DocComment(_, attr_style, data), span },
586-
_spacing,
618+
_fby,
587619
) => {
588620
let desugared = desugared_tts(attr_style, data, span);
589621
let desugared_len = desugared.len();
@@ -630,9 +662,9 @@ impl TokenStream {
630662
delim_span,
631663
Delimiter::Bracket,
632664
[
633-
TokenTree::token_alone(token::Ident(sym::doc, false), span),
634-
TokenTree::token_alone(token::Eq, span),
635-
TokenTree::token_alone(
665+
TokenTree::token_fby_space(token::Ident(sym::doc, false), span),
666+
TokenTree::token_fby_space(token::Eq, span),
667+
TokenTree::token_fby_space(
636668
TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
637669
span,
638670
),
@@ -643,12 +675,12 @@ impl TokenStream {
643675

644676
if attr_style == AttrStyle::Inner {
645677
vec![
646-
TokenTree::token_alone(token::Pound, span),
647-
TokenTree::token_alone(token::Not, span),
678+
TokenTree::token_fby_space(token::Pound, span),
679+
TokenTree::token_fby_space(token::Not, span),
648680
body,
649681
]
650682
} else {
651-
vec![TokenTree::token_alone(token::Pound, span), body]
683+
vec![TokenTree::token_fby_space(token::Pound, span), body]
652684
}
653685
}
654686
}

compiler/rustc_ast_pretty/src/pprust/state.rs

+22-6
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use crate::pp::{self, Breaks};
77
use rustc_ast::attr::AttrIdGenerator;
88
use rustc_ast::ptr::P;
99
use rustc_ast::token::{self, BinOpToken, CommentKind, Delimiter, Nonterminal, Token, TokenKind};
10-
use rustc_ast::tokenstream::{TokenStream, TokenTree};
10+
use rustc_ast::tokenstream::{FollowedBy, TokenStream, TokenTree};
1111
use rustc_ast::util::classify;
1212
use rustc_ast::util::comments::{gather_comments, Comment, CommentStyle};
1313
use rustc_ast::util::parser;
@@ -547,14 +547,15 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
547547
/// appropriate macro, transcribe back into the grammar we just parsed from,
548548
/// and then pretty-print the resulting AST nodes (so, e.g., we print
549549
/// expression arguments as expressions). It can be done! I think.
550-
fn print_tt(&mut self, tt: &TokenTree, convert_dollar_crate: bool) {
550+
fn print_tt(&mut self, tt: &TokenTree, convert_dollar_crate: bool) -> FollowedBy {
551551
match tt {
552-
TokenTree::Token(token, _) => {
552+
TokenTree::Token(token, fby) => {
553553
let token_str = self.token_to_string_ext(token, convert_dollar_crate);
554554
self.word(token_str);
555555
if let token::DocComment(..) = token.kind {
556556
self.hardbreak()
557557
}
558+
*fby
558559
}
559560
TokenTree::Delimited(dspan, delim, tts) => {
560561
self.print_mac_common(
@@ -566,16 +567,29 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
566567
convert_dollar_crate,
567568
dspan.entire(),
568569
);
570+
// FIXME: add two `FollowedBy` fields to `TokenTree::Delimited`
571+
// and use the close delim one here.
572+
FollowedBy::Space
569573
}
570574
}
571575
}
572576

573577
fn print_tts(&mut self, tts: &TokenStream, convert_dollar_crate: bool) {
574578
let mut iter = tts.trees().peekable();
575579
while let Some(tt) = iter.next() {
576-
self.print_tt(tt, convert_dollar_crate);
580+
let fby = self.print_tt(tt, convert_dollar_crate);
577581
if let Some(next) = iter.peek() {
578-
if tt_prepend_space(next, tt) {
582+
// Should we print a space after `tt`? There are two guiding
583+
// factors.
584+
// - `fby` is the more important and accurate one. Most tokens
585+
// have good followed-by information, and `Punct`/`Other` get
586+
// used a lot.
587+
// - `tt_prepend_space` is the backup. Code produced by proc
588+
// macros has worse followed-by information, with no `Other`
589+
// usage and too much `Space` usage, resulting in over-spaced
590+
// output such as `let a : Vec < u32 > = Vec :: new() ;`.
591+
// `tt_prepend_space` avoids some of excess whitespace.
592+
if fby == FollowedBy::Space && tt_prepend_space(next, tt) {
579593
self.space();
580594
}
581595
}
@@ -855,7 +869,9 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
855869
}
856870

857871
fn tt_to_string(&self, tt: &TokenTree) -> String {
858-
Self::to_string(|s| s.print_tt(tt, false))
872+
Self::to_string(|s| {
873+
s.print_tt(tt, false);
874+
})
859875
}
860876

861877
fn tts_to_string(&self, tokens: &TokenStream) -> String {

compiler/rustc_builtin_macros/src/assert/context.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ impl<'cx, 'a> Context<'cx, 'a> {
151151
fn build_panic(&self, expr_str: &str, panic_path: Path) -> P<Expr> {
152152
let escaped_expr_str = escape_to_fmt(expr_str);
153153
let initial = [
154-
TokenTree::token_alone(
154+
TokenTree::token_fby_space(
155155
token::Literal(token::Lit {
156156
kind: token::LitKind::Str,
157157
symbol: Symbol::intern(&if self.fmt_string.is_empty() {
@@ -166,12 +166,12 @@ impl<'cx, 'a> Context<'cx, 'a> {
166166
}),
167167
self.span,
168168
),
169-
TokenTree::token_alone(token::Comma, self.span),
169+
TokenTree::token_fby_space(token::Comma, self.span),
170170
];
171171
let captures = self.capture_decls.iter().flat_map(|cap| {
172172
[
173-
TokenTree::token_alone(token::Ident(cap.ident.name, false), cap.ident.span),
174-
TokenTree::token_alone(token::Comma, self.span),
173+
TokenTree::token_fby_space(token::Ident(cap.ident.name, false), cap.ident.span),
174+
TokenTree::token_fby_space(token::Comma, self.span),
175175
]
176176
});
177177
self.cx.expr(

0 commit comments

Comments
 (0)