Skip to content

Commit c973cc2

Browse files
authored
Rollup merge of #65392 - Centril:nt-to-tt, r=Mark-Simulacrum
Move `Nonterminal::to_tokenstream` to parser & don't rely directly on parser in lowering Split out from #65324. r? @petrochenkov
2 parents d24da2a + 07e946c commit c973cc2

File tree

5 files changed

+152
-142
lines changed

5 files changed

+152
-142
lines changed

src/librustc/hir/lowering.rs

+12-2
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ use syntax::print::pprust;
7070
use syntax::source_map::{respan, ExpnData, ExpnKind, DesugaringKind, Spanned};
7171
use syntax::symbol::{kw, sym, Symbol};
7272
use syntax::tokenstream::{TokenStream, TokenTree};
73-
use syntax::parse::token::{self, Token};
73+
use syntax::parse::token::{self, Nonterminal, Token};
74+
use syntax::parse::ParseSess;
7475
use syntax::visit::{self, Visitor};
7576
use syntax_pos::Span;
7677

@@ -86,6 +87,11 @@ pub struct LoweringContext<'a> {
8687

8788
resolver: &'a mut dyn Resolver,
8889

90+
/// HACK(Centril): there is a cyclic dependency between the parser and lowering
91+
/// if we don't have this function pointer. To avoid that dependency so that
92+
/// librustc is independent of the parser, we use dynamic dispatch here.
93+
nt_to_tokenstream: NtToTokenstream,
94+
8995
/// The items being lowered are collected here.
9096
items: BTreeMap<hir::HirId, hir::Item>,
9197

@@ -180,6 +186,8 @@ pub trait Resolver {
180186
fn has_derives(&self, node_id: NodeId, derives: SpecialDerives) -> bool;
181187
}
182188

189+
type NtToTokenstream = fn(&Nonterminal, &ParseSess, Span) -> TokenStream;
190+
183191
/// Context of `impl Trait` in code, which determines whether it is allowed in an HIR subtree,
184192
/// and if so, what meaning it has.
185193
#[derive(Debug)]
@@ -236,6 +244,7 @@ pub fn lower_crate(
236244
dep_graph: &DepGraph,
237245
krate: &Crate,
238246
resolver: &mut dyn Resolver,
247+
nt_to_tokenstream: NtToTokenstream,
239248
) -> hir::Crate {
240249
// We're constructing the HIR here; we don't care what we will
241250
// read, since we haven't even constructed the *input* to
@@ -249,6 +258,7 @@ pub fn lower_crate(
249258
sess,
250259
cstore,
251260
resolver,
261+
nt_to_tokenstream,
252262
items: BTreeMap::new(),
253263
trait_items: BTreeMap::new(),
254264
impl_items: BTreeMap::new(),
@@ -1022,7 +1032,7 @@ impl<'a> LoweringContext<'a> {
10221032
fn lower_token(&mut self, token: Token) -> TokenStream {
10231033
match token.kind {
10241034
token::Interpolated(nt) => {
1025-
let tts = nt.to_tokenstream(&self.sess.parse_sess, token.span);
1035+
let tts = (self.nt_to_tokenstream)(&nt, &self.sess.parse_sess, token.span);
10261036
self.lower_token_stream(tts)
10271037
}
10281038
_ => TokenTree::Token(token).into(),

src/librustc_interface/passes.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,8 @@ pub fn lower_to_hir(
541541
) -> Result<hir::map::Forest> {
542542
// Lower AST to HIR.
543543
let hir_forest = time(sess, "lowering AST -> HIR", || {
544-
let hir_crate = lower_crate(sess, cstore, &dep_graph, &krate, resolver);
544+
let nt_to_tokenstream = syntax::parse::nt_to_tokenstream;
545+
let hir_crate = lower_crate(sess, cstore, &dep_graph, &krate, resolver, nt_to_tokenstream);
545546

546547
if sess.opts.debugging_opts.hir_stats {
547548
hir_stats::print_hir_stats(&hir_crate);

src/libsyntax/ext/proc_macro_server.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec<Self>)>
175175
}
176176

177177
Interpolated(nt) => {
178-
let stream = nt.to_tokenstream(sess, span);
178+
let stream = parse::nt_to_tokenstream(&nt, sess, span);
179179
TokenTree::Group(Group {
180180
delimiter: Delimiter::None,
181181
stream,

src/libsyntax/parse/mod.rs

+134-4
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@ use crate::ast::{self, CrateConfig, NodeId};
44
use crate::early_buffered_lints::{BufferedEarlyLint, BufferedEarlyLintId};
55
use crate::source_map::{SourceMap, FilePathMapping};
66
use crate::feature_gate::UnstableFeatures;
7-
use crate::parse::parser::Parser;
8-
use crate::parse::parser::emit_unclosed_delims;
9-
use crate::parse::token::TokenKind;
10-
use crate::tokenstream::{TokenStream, TokenTree};
7+
use crate::parse::parser::{Parser, emit_unclosed_delims};
8+
use crate::parse::token::{Nonterminal, TokenKind};
9+
use crate::tokenstream::{self, TokenStream, TokenTree};
1110
use crate::print::pprust;
1211
use crate::symbol::Symbol;
1312

@@ -24,6 +23,8 @@ use std::borrow::Cow;
2423
use std::path::{Path, PathBuf};
2524
use std::str;
2625

26+
use log::info;
27+
2728
#[cfg(test)]
2829
mod tests;
2930

@@ -407,3 +408,132 @@ impl SeqSep {
407408
}
408409
}
409410
}
411+
412+
// NOTE(Centril): The following probably shouldn't be here but it acknowledges the
413+
// fact that architecturally, we are using parsing (read on below to understand why).
414+
415+
pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> TokenStream {
416+
// A `Nonterminal` is often a parsed AST item. At this point we now
417+
// need to convert the parsed AST to an actual token stream, e.g.
418+
// un-parse it basically.
419+
//
420+
// Unfortunately there's not really a great way to do that in a
421+
// guaranteed lossless fashion right now. The fallback here is to just
422+
// stringify the AST node and reparse it, but this loses all span
423+
// information.
424+
//
425+
// As a result, some AST nodes are annotated with the token stream they
426+
// came from. Here we attempt to extract these lossless token streams
427+
// before we fall back to the stringification.
428+
let tokens = match *nt {
429+
Nonterminal::NtItem(ref item) => {
430+
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
431+
}
432+
Nonterminal::NtTraitItem(ref item) => {
433+
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
434+
}
435+
Nonterminal::NtImplItem(ref item) => {
436+
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
437+
}
438+
Nonterminal::NtIdent(ident, is_raw) => {
439+
Some(tokenstream::TokenTree::token(token::Ident(ident.name, is_raw), ident.span).into())
440+
}
441+
Nonterminal::NtLifetime(ident) => {
442+
Some(tokenstream::TokenTree::token(token::Lifetime(ident.name), ident.span).into())
443+
}
444+
Nonterminal::NtTT(ref tt) => {
445+
Some(tt.clone().into())
446+
}
447+
_ => None,
448+
};
449+
450+
// FIXME(#43081): Avoid this pretty-print + reparse hack
451+
let source = pprust::nonterminal_to_string(nt);
452+
let filename = FileName::macro_expansion_source_code(&source);
453+
let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));
454+
455+
// During early phases of the compiler the AST could get modified
456+
// directly (e.g., attributes added or removed) and the internal cache
457+
// of tokens my not be invalidated or updated. Consequently if the
458+
// "lossless" token stream disagrees with our actual stringification
459+
// (which has historically been much more battle-tested) then we go
460+
// with the lossy stream anyway (losing span information).
461+
//
462+
// Note that the comparison isn't `==` here to avoid comparing spans,
463+
// but it *also* is a "probable" equality which is a pretty weird
464+
// definition. We mostly want to catch actual changes to the AST
465+
// like a `#[cfg]` being processed or some weird `macro_rules!`
466+
// expansion.
467+
//
468+
// What we *don't* want to catch is the fact that a user-defined
469+
// literal like `0xf` is stringified as `15`, causing the cached token
470+
// stream to not be literal `==` token-wise (ignoring spans) to the
471+
// token stream we got from stringification.
472+
//
473+
// Instead the "probably equal" check here is "does each token
474+
// recursively have the same discriminant?" We basically don't look at
475+
// the token values here and assume that such fine grained token stream
476+
// modifications, including adding/removing typically non-semantic
477+
// tokens such as extra braces and commas, don't happen.
478+
if let Some(tokens) = tokens {
479+
if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
480+
return tokens
481+
}
482+
info!("cached tokens found, but they're not \"probably equal\", \
483+
going with stringified version");
484+
}
485+
return tokens_for_real
486+
}
487+
488+
fn prepend_attrs(
489+
sess: &ParseSess,
490+
attrs: &[ast::Attribute],
491+
tokens: Option<&tokenstream::TokenStream>,
492+
span: syntax_pos::Span
493+
) -> Option<tokenstream::TokenStream> {
494+
let tokens = tokens?;
495+
if attrs.len() == 0 {
496+
return Some(tokens.clone())
497+
}
498+
let mut builder = tokenstream::TokenStreamBuilder::new();
499+
for attr in attrs {
500+
assert_eq!(attr.style, ast::AttrStyle::Outer,
501+
"inner attributes should prevent cached tokens from existing");
502+
503+
let source = pprust::attribute_to_string(attr);
504+
let macro_filename = FileName::macro_expansion_source_code(&source);
505+
if attr.is_sugared_doc {
506+
let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
507+
builder.push(stream);
508+
continue
509+
}
510+
511+
// synthesize # [ $path $tokens ] manually here
512+
let mut brackets = tokenstream::TokenStreamBuilder::new();
513+
514+
// For simple paths, push the identifier directly
515+
if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
516+
let ident = attr.path.segments[0].ident;
517+
let token = token::Ident(ident.name, ident.as_str().starts_with("r#"));
518+
brackets.push(tokenstream::TokenTree::token(token, ident.span));
519+
520+
// ... and for more complicated paths, fall back to a reparse hack that
521+
// should eventually be removed.
522+
} else {
523+
let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
524+
brackets.push(stream);
525+
}
526+
527+
brackets.push(attr.tokens.clone());
528+
529+
// The span we list here for `#` and for `[ ... ]` are both wrong in
530+
// that it encompasses more than each token, but it hopefully is "good
531+
// enough" for now at least.
532+
builder.push(tokenstream::TokenTree::token(token::Pound, attr.span));
533+
let delim_span = tokenstream::DelimSpan::from_single(attr.span);
534+
builder.push(tokenstream::TokenTree::Delimited(
535+
delim_span, token::DelimToken::Bracket, brackets.build().into()));
536+
}
537+
builder.push(tokens.clone());
538+
Some(builder.build())
539+
}

src/libsyntax/parse/token.rs

+3-134
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,13 @@ pub use DelimToken::*;
44
pub use LitKind::*;
55
pub use TokenKind::*;
66

7-
use crate::ast::{self};
8-
use crate::parse::{parse_stream_from_source_str, ParseSess};
9-
use crate::print::pprust;
7+
use crate::ast;
108
use crate::ptr::P;
119
use crate::symbol::kw;
12-
use crate::tokenstream::{self, DelimSpan, TokenStream, TokenTree};
10+
use crate::tokenstream::TokenTree;
1311

1412
use syntax_pos::symbol::Symbol;
15-
use syntax_pos::{self, Span, FileName, DUMMY_SP};
16-
use log::info;
13+
use syntax_pos::{self, Span, DUMMY_SP};
1714

1815
use std::fmt;
1916
use std::mem;
@@ -737,131 +734,3 @@ impl fmt::Debug for Nonterminal {
737734
}
738735
}
739736
}
740-
741-
impl Nonterminal {
742-
pub fn to_tokenstream(&self, sess: &ParseSess, span: Span) -> TokenStream {
743-
// A `Nonterminal` is often a parsed AST item. At this point we now
744-
// need to convert the parsed AST to an actual token stream, e.g.
745-
// un-parse it basically.
746-
//
747-
// Unfortunately there's not really a great way to do that in a
748-
// guaranteed lossless fashion right now. The fallback here is to just
749-
// stringify the AST node and reparse it, but this loses all span
750-
// information.
751-
//
752-
// As a result, some AST nodes are annotated with the token stream they
753-
// came from. Here we attempt to extract these lossless token streams
754-
// before we fall back to the stringification.
755-
let tokens = match *self {
756-
Nonterminal::NtItem(ref item) => {
757-
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
758-
}
759-
Nonterminal::NtTraitItem(ref item) => {
760-
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
761-
}
762-
Nonterminal::NtImplItem(ref item) => {
763-
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
764-
}
765-
Nonterminal::NtIdent(ident, is_raw) => {
766-
Some(TokenTree::token(Ident(ident.name, is_raw), ident.span).into())
767-
}
768-
Nonterminal::NtLifetime(ident) => {
769-
Some(TokenTree::token(Lifetime(ident.name), ident.span).into())
770-
}
771-
Nonterminal::NtTT(ref tt) => {
772-
Some(tt.clone().into())
773-
}
774-
_ => None,
775-
};
776-
777-
// FIXME(#43081): Avoid this pretty-print + reparse hack
778-
let source = pprust::nonterminal_to_string(self);
779-
let filename = FileName::macro_expansion_source_code(&source);
780-
let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));
781-
782-
// During early phases of the compiler the AST could get modified
783-
// directly (e.g., attributes added or removed) and the internal cache
784-
// of tokens my not be invalidated or updated. Consequently if the
785-
// "lossless" token stream disagrees with our actual stringification
786-
// (which has historically been much more battle-tested) then we go
787-
// with the lossy stream anyway (losing span information).
788-
//
789-
// Note that the comparison isn't `==` here to avoid comparing spans,
790-
// but it *also* is a "probable" equality which is a pretty weird
791-
// definition. We mostly want to catch actual changes to the AST
792-
// like a `#[cfg]` being processed or some weird `macro_rules!`
793-
// expansion.
794-
//
795-
// What we *don't* want to catch is the fact that a user-defined
796-
// literal like `0xf` is stringified as `15`, causing the cached token
797-
// stream to not be literal `==` token-wise (ignoring spans) to the
798-
// token stream we got from stringification.
799-
//
800-
// Instead the "probably equal" check here is "does each token
801-
// recursively have the same discriminant?" We basically don't look at
802-
// the token values here and assume that such fine grained token stream
803-
// modifications, including adding/removing typically non-semantic
804-
// tokens such as extra braces and commas, don't happen.
805-
if let Some(tokens) = tokens {
806-
if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
807-
return tokens
808-
}
809-
info!("cached tokens found, but they're not \"probably equal\", \
810-
going with stringified version");
811-
}
812-
return tokens_for_real
813-
}
814-
}
815-
816-
fn prepend_attrs(sess: &ParseSess,
817-
attrs: &[ast::Attribute],
818-
tokens: Option<&tokenstream::TokenStream>,
819-
span: syntax_pos::Span)
820-
-> Option<tokenstream::TokenStream>
821-
{
822-
let tokens = tokens?;
823-
if attrs.len() == 0 {
824-
return Some(tokens.clone())
825-
}
826-
let mut builder = tokenstream::TokenStreamBuilder::new();
827-
for attr in attrs {
828-
assert_eq!(attr.style, ast::AttrStyle::Outer,
829-
"inner attributes should prevent cached tokens from existing");
830-
831-
let source = pprust::attribute_to_string(attr);
832-
let macro_filename = FileName::macro_expansion_source_code(&source);
833-
if attr.is_sugared_doc {
834-
let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
835-
builder.push(stream);
836-
continue
837-
}
838-
839-
// synthesize # [ $path $tokens ] manually here
840-
let mut brackets = tokenstream::TokenStreamBuilder::new();
841-
842-
// For simple paths, push the identifier directly
843-
if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
844-
let ident = attr.path.segments[0].ident;
845-
let token = Ident(ident.name, ident.as_str().starts_with("r#"));
846-
brackets.push(tokenstream::TokenTree::token(token, ident.span));
847-
848-
// ... and for more complicated paths, fall back to a reparse hack that
849-
// should eventually be removed.
850-
} else {
851-
let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
852-
brackets.push(stream);
853-
}
854-
855-
brackets.push(attr.tokens.clone());
856-
857-
// The span we list here for `#` and for `[ ... ]` are both wrong in
858-
// that it encompasses more than each token, but it hopefully is "good
859-
// enough" for now at least.
860-
builder.push(tokenstream::TokenTree::token(Pound, attr.span));
861-
let delim_span = DelimSpan::from_single(attr.span);
862-
builder.push(tokenstream::TokenTree::Delimited(
863-
delim_span, DelimToken::Bracket, brackets.build().into()));
864-
}
865-
builder.push(tokens.clone());
866-
Some(builder.build())
867-
}

0 commit comments

Comments
 (0)