Skip to content

Commit e482c86

Browse files
committed
Auto merge of #73084 - Aaron1011:feature/new-recursive-expand, r=petrochenkov
Re-land PR #72388: Recursively expand `TokenKind::Interpolated` in `probably_equal_for_proc_macro` PR #72388 allowed us to preserve the original `TokenStream` in more cases during proc-macro expansion, but had to be reverted due to a large number of regressions (See #72545 and #72622). These regressions fell into two categories 1. Missing handling for `Group`s with `Delimiter::None`, which are inserted during `macro_rules!` expansion (but are lost during stringification and re-parsing). A large number of these regressions were due to `syn` and `proc-macro-hack`, but several crates needed changes to their own proc-macro code. 2. Legitimate hygiene issues that were previously being masked by stringification. Some of these were relatively benign (e.g. [a compiliation error](paritytech/parity-scale-codec#210) caused by misusing `quote_spanned!`). However, two crates had intentionally written unhygenic `macro_rules!` macros, which were able to access identifiers that were not passed as arguments (see #72622 (comment)). All but one of the Crater regressions have now been fixed upstream (see https://hackmd.io/ItrXWRaSSquVwoJATPx3PQ?both). The remaining crate (which has a PR pending at sammhicks/face-generator#1) is not on `crates.io`, and is a Yew application that seems unlikely to have any reverse dependencies. As @petrochenkov mentioned in #72545 (comment), not re-landing PR #72388 allows more crates to write unhygenic `macro_rules!` macros, which will eventually stop compiling. Since there is only one Crater regression remaining, since additional crates could write unhygenic `macro_rules!` macros in the time it takes that PR to be merged.
2 parents 663d2f5 + 0fcad9c commit e482c86

File tree

15 files changed

+255
-83
lines changed

15 files changed

+255
-83
lines changed

src/librustc_ast/token.rs

+28-1
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@ use crate::tokenstream::TokenTree;
1111
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
1212
use rustc_data_structures::sync::Lrc;
1313
use rustc_macros::HashStable_Generic;
14+
use rustc_span::hygiene::ExpnKind;
15+
use rustc_span::source_map::SourceMap;
1416
use rustc_span::symbol::{kw, sym};
1517
use rustc_span::symbol::{Ident, Symbol};
16-
use rustc_span::{self, Span, DUMMY_SP};
18+
use rustc_span::{self, FileName, RealFileName, Span, DUMMY_SP};
1719
use std::borrow::Cow;
1820
use std::{fmt, mem};
1921

@@ -808,6 +810,31 @@ impl Nonterminal {
808810
}
809811
false
810812
}
813+
814+
// See issue #74616 for details
815+
pub fn ident_name_compatibility_hack(
816+
&self,
817+
orig_span: Span,
818+
source_map: &SourceMap,
819+
) -> Option<(Ident, bool)> {
820+
if let NtIdent(ident, is_raw) = self {
821+
if let ExpnKind::Macro(_, macro_name) = orig_span.ctxt().outer_expn_data().kind {
822+
let filename = source_map.span_to_filename(orig_span);
823+
if let FileName::Real(RealFileName::Named(path)) = filename {
824+
if (path.ends_with("time-macros-impl/src/lib.rs")
825+
&& macro_name == sym::impl_macros)
826+
|| (path.ends_with("js-sys/src/lib.rs") && macro_name == sym::arrays)
827+
{
828+
let snippet = source_map.span_to_snippet(orig_span);
829+
if snippet.as_deref() == Ok("$name") {
830+
return Some((*ident, *is_raw));
831+
}
832+
}
833+
}
834+
}
835+
}
836+
None
837+
}
811838
}
812839

813840
impl PartialEq for Nonterminal {

src/librustc_expand/proc_macro_server.rs

+13-7
Original file line numberDiff line numberDiff line change
@@ -173,13 +173,19 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec<Self>)>
173173
}
174174

175175
Interpolated(nt) => {
176-
let stream = nt_to_tokenstream(&nt, sess, span);
177-
TokenTree::Group(Group {
178-
delimiter: Delimiter::None,
179-
stream,
180-
span: DelimSpan::from_single(span),
181-
flatten: nt.pretty_printing_compatibility_hack(),
182-
})
176+
if let Some((name, is_raw)) =
177+
nt.ident_name_compatibility_hack(span, sess.source_map())
178+
{
179+
TokenTree::Ident(Ident::new(sess, name.name, is_raw, name.span))
180+
} else {
181+
let stream = nt_to_tokenstream(&nt, sess, span);
182+
TokenTree::Group(Group {
183+
delimiter: Delimiter::None,
184+
stream,
185+
span: DelimSpan::from_single(span),
186+
flatten: nt.pretty_printing_compatibility_hack(),
187+
})
188+
}
183189
}
184190

185191
OpenDelim(..) | CloseDelim(..) => unreachable!(),

src/librustc_parse/lib.rs

+40-13
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
#![feature(or_patterns)]
88

99
use rustc_ast as ast;
10-
use rustc_ast::token::{self, DelimToken, Nonterminal, Token};
11-
use rustc_ast::tokenstream::{self, TokenStream, TokenTree};
10+
use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind};
11+
use rustc_ast::tokenstream::{self, IsJoint, TokenStream, TokenTree};
1212
use rustc_ast_pretty::pprust;
1313
use rustc_data_structures::sync::Lrc;
1414
use rustc_errors::{Diagnostic, FatalError, Level, PResult};
@@ -309,7 +309,7 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
309309
// modifications, including adding/removing typically non-semantic
310310
// tokens such as extra braces and commas, don't happen.
311311
if let Some(tokens) = tokens {
312-
if tokenstream_probably_equal_for_proc_macro(&tokens, &tokens_for_real) {
312+
if tokenstream_probably_equal_for_proc_macro(&tokens, &tokens_for_real, sess) {
313313
return tokens;
314314
}
315315
info!(
@@ -327,7 +327,11 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
327327
//
328328
// This is otherwise the same as `eq_unspanned`, only recursing with a
329329
// different method.
330-
pub fn tokenstream_probably_equal_for_proc_macro(first: &TokenStream, other: &TokenStream) -> bool {
330+
pub fn tokenstream_probably_equal_for_proc_macro(
331+
first: &TokenStream,
332+
other: &TokenStream,
333+
sess: &ParseSess,
334+
) -> bool {
331335
// When checking for `probably_eq`, we ignore certain tokens that aren't
332336
// preserved in the AST. Because they are not preserved, the pretty
333337
// printer arbitrarily adds or removes them when printing as token
@@ -408,20 +412,39 @@ pub fn tokenstream_probably_equal_for_proc_macro(first: &TokenStream, other: &To
408412
}
409413
}
410414
token_trees = out.into_iter().map(TokenTree::Token).collect();
411-
if token_trees.len() != 1 {
412-
debug!("break_tokens: broke {:?} to {:?}", tree, token_trees);
413-
}
414415
} else {
415416
token_trees = SmallVec::new();
416417
token_trees.push(tree);
417418
}
418419
token_trees.into_iter()
419420
}
420421

421-
let mut t1 = first.trees().filter(semantic_tree).flat_map(break_tokens);
422-
let mut t2 = other.trees().filter(semantic_tree).flat_map(break_tokens);
422+
let expand_nt = |tree: TokenTree| {
423+
if let TokenTree::Token(Token { kind: TokenKind::Interpolated(nt), span }) = &tree {
424+
// When checking tokenstreams for 'probable equality', we are comparing
425+
// a captured (from parsing) `TokenStream` to a reparsed tokenstream.
426+
// The reparsed Tokenstream will never have `None`-delimited groups,
427+
// since they are only ever inserted as a result of macro expansion.
428+
// Therefore, inserting a `None`-delimtied group here (when we
429+
// convert a nested `Nonterminal` to a tokenstream) would cause
430+
// a mismatch with the reparsed tokenstream.
431+
//
432+
// Note that we currently do not handle the case where the
433+
// reparsed stream has a `Parenthesis`-delimited group
434+
// inserted. This will cause a spurious mismatch:
435+
// issue #75734 tracks resolving this.
436+
nt_to_tokenstream(nt, sess, *span).into_trees()
437+
} else {
438+
TokenStream::new(vec![(tree, IsJoint::NonJoint)]).into_trees()
439+
}
440+
};
441+
442+
// Break tokens after we expand any nonterminals, so that we break tokens
443+
// that are produced as a result of nonterminal expansion.
444+
let mut t1 = first.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens);
445+
let mut t2 = other.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens);
423446
for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
424-
if !tokentree_probably_equal_for_proc_macro(&t1, &t2) {
447+
if !tokentree_probably_equal_for_proc_macro(&t1, &t2, sess) {
425448
return false;
426449
}
427450
}
@@ -433,13 +456,17 @@ pub fn tokenstream_probably_equal_for_proc_macro(first: &TokenStream, other: &To
433456
//
434457
// This is otherwise the same as `eq_unspanned`, only recursing with a
435458
// different method.
436-
fn tokentree_probably_equal_for_proc_macro(first: &TokenTree, other: &TokenTree) -> bool {
459+
pub fn tokentree_probably_equal_for_proc_macro(
460+
first: &TokenTree,
461+
other: &TokenTree,
462+
sess: &ParseSess,
463+
) -> bool {
437464
match (first, other) {
438465
(TokenTree::Token(token), TokenTree::Token(token2)) => {
439466
token_probably_equal_for_proc_macro(token, token2)
440467
}
441468
(TokenTree::Delimited(_, delim, tts), TokenTree::Delimited(_, delim2, tts2)) => {
442-
delim == delim2 && tokenstream_probably_equal_for_proc_macro(&tts, &tts2)
469+
delim == delim2 && tokenstream_probably_equal_for_proc_macro(&tts, &tts2, sess)
443470
}
444471
_ => false,
445472
}
@@ -498,7 +525,7 @@ fn token_probably_equal_for_proc_macro(first: &Token, other: &Token) -> bool {
498525
b == d && (a == c || a == kw::DollarCrate || c == kw::DollarCrate)
499526
}
500527

501-
(&Interpolated(..), &Interpolated(..)) => false,
528+
(&Interpolated(..), &Interpolated(..)) => panic!("Unexpanded Interpolated!"),
502529

503530
_ => panic!("forgot to add a token?"),
504531
}

src/librustc_span/symbol.rs

+2
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ symbols! {
258258
arith_offset,
259259
arm_target_feature,
260260
array,
261+
arrays,
261262
as_str,
262263
asm,
263264
assert,
@@ -572,6 +573,7 @@ symbols! {
572573
ignore,
573574
impl_header_lifetime_elision,
574575
impl_lint_pass,
576+
impl_macros,
575577
impl_trait_in_bindings,
576578
import_shadowing,
577579
in_band_lifetimes,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// force-host
2+
// no-prefer-dynamic
3+
4+
#![crate_type = "proc-macro"]
5+
6+
extern crate proc_macro;
7+
use proc_macro::TokenStream;
8+
9+
#[proc_macro_attribute]
10+
pub fn my_macro(_attr: TokenStream, input: TokenStream) -> TokenStream {
11+
println!("Called proc_macro_hack with {:?}", input);
12+
input
13+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// check-pass
2+
// aux-build:group-compat-hack.rs
3+
// compile-flags: -Z span-debug
4+
5+
#![no_std] // Don't load unnecessary hygiene information from std
6+
extern crate std;
7+
8+
#[macro_use] extern crate group_compat_hack;
9+
10+
// Tests the backwards compatibility hack added for certain macros
11+
// When an attribute macro named `proc_macro_hack` or `wasm_bindgen`
12+
// has an `NtIdent` named `$name`, we pass a plain `Ident` token in
13+
// place of a `None`-delimited group. This allows us to maintain
14+
// backwards compatibility for older versions of these crates.
15+
16+
include!("js-sys/src/lib.rs");
17+
include!("time-macros-impl/src/lib.rs");
18+
19+
macro_rules! other {
20+
($name:ident) => {
21+
#[my_macro] struct Three($name);
22+
}
23+
}
24+
25+
fn main() {
26+
struct Foo;
27+
impl_macros!(Foo);
28+
arrays!(Foo);
29+
other!(Foo);
30+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Called proc_macro_hack with TokenStream [Ident { ident: "struct", span: $DIR/time-macros-impl/src/lib.rs:5:21: 5:27 (#5) }, Ident { ident: "One", span: $DIR/time-macros-impl/src/lib.rs:5:28: 5:31 (#5) }, Group { delimiter: Parenthesis, stream: TokenStream [Ident { ident: "Foo", span: $DIR/group-compat-hack.rs:27:18: 27:21 (#0) }], span: $DIR/time-macros-impl/src/lib.rs:5:31: 5:38 (#5) }, Punct { ch: ';', spacing: Alone, span: $DIR/time-macros-impl/src/lib.rs:5:38: 5:39 (#5) }]
2+
Called proc_macro_hack with TokenStream [Ident { ident: "struct", span: $DIR/js-sys/src/lib.rs:5:21: 5:27 (#9) }, Ident { ident: "Two", span: $DIR/js-sys/src/lib.rs:5:28: 5:31 (#9) }, Group { delimiter: Parenthesis, stream: TokenStream [Ident { ident: "Foo", span: $DIR/group-compat-hack.rs:28:13: 28:16 (#0) }], span: $DIR/js-sys/src/lib.rs:5:31: 5:38 (#9) }, Punct { ch: ';', spacing: Alone, span: $DIR/js-sys/src/lib.rs:5:38: 5:39 (#9) }]
3+
Called proc_macro_hack with TokenStream [Ident { ident: "struct", span: $DIR/group-compat-hack.rs:21:21: 21:27 (#13) }, Ident { ident: "Three", span: $DIR/group-compat-hack.rs:21:28: 21:33 (#13) }, Group { delimiter: Parenthesis, stream: TokenStream [Group { delimiter: None, stream: TokenStream [Ident { ident: "Foo", span: $DIR/group-compat-hack.rs:29:12: 29:15 (#0) }], span: $DIR/group-compat-hack.rs:21:34: 21:39 (#13) }], span: $DIR/group-compat-hack.rs:21:33: 21:40 (#13) }, Punct { ch: ';', spacing: Alone, span: $DIR/group-compat-hack.rs:21:40: 21:41 (#13) }]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// ignore-test this is not a test
2+
3+
macro_rules! arrays {
4+
($name:ident) => {
5+
#[my_macro] struct Two($name);
6+
}
7+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// ignore-test this is not a test
2+
3+
macro_rules! impl_macros {
4+
($name:ident) => {
5+
#[my_macro] struct One($name);
6+
}
7+
}

src/test/ui/proc-macro/input-interpolated.stdout

+26-14
Original file line numberDiff line numberDiff line change
@@ -15,51 +15,63 @@ PRINT-ATTR INPUT (DISPLAY): const A : u8 = 0 ;
1515
PRINT-ATTR INPUT (DEBUG): TokenStream [
1616
Ident {
1717
ident: "const",
18-
span: #0 bytes(0..0),
18+
span: #3 bytes(416..421),
1919
},
20-
Ident {
21-
ident: "A",
22-
span: #0 bytes(0..0),
20+
Group {
21+
delimiter: None,
22+
stream: TokenStream [
23+
Ident {
24+
ident: "A",
25+
span: #0 bytes(503..504),
26+
},
27+
],
28+
span: #3 bytes(422..424),
2329
},
2430
Punct {
2531
ch: ':',
2632
spacing: Alone,
27-
span: #0 bytes(0..0),
33+
span: #3 bytes(424..425),
2834
},
2935
Ident {
3036
ident: "u8",
31-
span: #0 bytes(0..0),
37+
span: #3 bytes(426..428),
3238
},
3339
Punct {
3440
ch: '=',
3541
spacing: Alone,
36-
span: #0 bytes(0..0),
42+
span: #3 bytes(429..430),
3743
},
3844
Literal {
3945
kind: Integer,
4046
symbol: "0",
4147
suffix: None,
42-
span: #0 bytes(0..0),
48+
span: #3 bytes(431..432),
4349
},
4450
Punct {
4551
ch: ';',
4652
spacing: Alone,
47-
span: #0 bytes(0..0),
53+
span: #3 bytes(432..433),
4854
},
4955
]
5056
PRINT-DERIVE INPUT (DISPLAY): struct A { }
5157
PRINT-DERIVE INPUT (DEBUG): TokenStream [
5258
Ident {
5359
ident: "struct",
54-
span: #0 bytes(0..0),
60+
span: #3 bytes(468..474),
5561
},
56-
Ident {
57-
ident: "A",
58-
span: #0 bytes(0..0),
62+
Group {
63+
delimiter: None,
64+
stream: TokenStream [
65+
Ident {
66+
ident: "A",
67+
span: #0 bytes(503..504),
68+
},
69+
],
70+
span: #3 bytes(475..477),
5971
},
6072
Group {
6173
delimiter: Brace,
6274
stream: TokenStream [],
63-
span: #0 bytes(0..0),
75+
span: #3 bytes(478..480),
6476
},
6577
]

src/test/ui/proc-macro/macro-rules-derive.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
// aux-build:first-second.rs
2-
// FIXME: The spans here are bad, see PR #73084
32

43
extern crate first_second;
54
use first_second::*;
65

76
macro_rules! produce_it {
87
($name:ident) => {
9-
#[first] //~ ERROR cannot find type
8+
#[first]
109
struct $name {
11-
field: MissingType
10+
field: MissingType //~ ERROR cannot find type
1211
}
1312
}
1413
}

src/test/ui/proc-macro/macro-rules-derive.stderr

+8-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
error[E0412]: cannot find type `MissingType` in this scope
2-
--> $DIR/macro-rules-derive.rs:9:9
2+
--> $DIR/macro-rules-derive.rs:10:20
33
|
4-
LL | #[first]
5-
| ^^^^^^^^ not found in this scope
4+
LL | field: MissingType
5+
| ^^^^^^^^^^^ not found in this scope
6+
...
7+
LL | produce_it!(MyName);
8+
| -------------------- in this macro invocation
9+
|
10+
= note: this error originates in a macro (in Nightly builds, run with -Z macro-backtrace for more info)
611

712
error: aborting due to previous error
813

0 commit comments

Comments
 (0)