@@ -4,10 +4,9 @@ use crate::ast::{self, CrateConfig, NodeId};
4
4
use crate :: early_buffered_lints:: { BufferedEarlyLint , BufferedEarlyLintId } ;
5
5
use crate :: source_map:: { SourceMap , FilePathMapping } ;
6
6
use crate :: feature_gate:: UnstableFeatures ;
7
- use crate :: parse:: parser:: Parser ;
8
- use crate :: parse:: parser:: emit_unclosed_delims;
9
- use crate :: parse:: token:: TokenKind ;
10
- use crate :: tokenstream:: { TokenStream , TokenTree } ;
7
+ use crate :: parse:: parser:: { Parser , emit_unclosed_delims} ;
8
+ use crate :: parse:: token:: { Nonterminal , TokenKind } ;
9
+ use crate :: tokenstream:: { self , TokenStream , TokenTree } ;
11
10
use crate :: print:: pprust;
12
11
use crate :: symbol:: Symbol ;
13
12
@@ -24,6 +23,8 @@ use std::borrow::Cow;
24
23
use std:: path:: { Path , PathBuf } ;
25
24
use std:: str;
26
25
26
+ use log:: info;
27
+
27
28
#[ cfg( test) ]
28
29
mod tests;
29
30
@@ -407,3 +408,132 @@ impl SeqSep {
407
408
}
408
409
}
409
410
}
411
+
412
+ // NOTE(Centril): The following probably shouldn't be here but it acknowledges the
413
+ // fact that architecturally, we are using parsing (read on below to understand why).
414
+
415
+ pub fn nt_to_tokenstream ( nt : & Nonterminal , sess : & ParseSess , span : Span ) -> TokenStream {
416
+ // A `Nonterminal` is often a parsed AST item. At this point we now
417
+ // need to convert the parsed AST to an actual token stream, e.g.
418
+ // un-parse it basically.
419
+ //
420
+ // Unfortunately there's not really a great way to do that in a
421
+ // guaranteed lossless fashion right now. The fallback here is to just
422
+ // stringify the AST node and reparse it, but this loses all span
423
+ // information.
424
+ //
425
+ // As a result, some AST nodes are annotated with the token stream they
426
+ // came from. Here we attempt to extract these lossless token streams
427
+ // before we fall back to the stringification.
428
+ let tokens = match * nt {
429
+ Nonterminal :: NtItem ( ref item) => {
430
+ prepend_attrs ( sess, & item. attrs , item. tokens . as_ref ( ) , span)
431
+ }
432
+ Nonterminal :: NtTraitItem ( ref item) => {
433
+ prepend_attrs ( sess, & item. attrs , item. tokens . as_ref ( ) , span)
434
+ }
435
+ Nonterminal :: NtImplItem ( ref item) => {
436
+ prepend_attrs ( sess, & item. attrs , item. tokens . as_ref ( ) , span)
437
+ }
438
+ Nonterminal :: NtIdent ( ident, is_raw) => {
439
+ Some ( tokenstream:: TokenTree :: token ( token:: Ident ( ident. name , is_raw) , ident. span ) . into ( ) )
440
+ }
441
+ Nonterminal :: NtLifetime ( ident) => {
442
+ Some ( tokenstream:: TokenTree :: token ( token:: Lifetime ( ident. name ) , ident. span ) . into ( ) )
443
+ }
444
+ Nonterminal :: NtTT ( ref tt) => {
445
+ Some ( tt. clone ( ) . into ( ) )
446
+ }
447
+ _ => None ,
448
+ } ;
449
+
450
+ // FIXME(#43081): Avoid this pretty-print + reparse hack
451
+ let source = pprust:: nonterminal_to_string ( nt) ;
452
+ let filename = FileName :: macro_expansion_source_code ( & source) ;
453
+ let tokens_for_real = parse_stream_from_source_str ( filename, source, sess, Some ( span) ) ;
454
+
455
+ // During early phases of the compiler the AST could get modified
456
+ // directly (e.g., attributes added or removed) and the internal cache
457
+ // of tokens my not be invalidated or updated. Consequently if the
458
+ // "lossless" token stream disagrees with our actual stringification
459
+ // (which has historically been much more battle-tested) then we go
460
+ // with the lossy stream anyway (losing span information).
461
+ //
462
+ // Note that the comparison isn't `==` here to avoid comparing spans,
463
+ // but it *also* is a "probable" equality which is a pretty weird
464
+ // definition. We mostly want to catch actual changes to the AST
465
+ // like a `#[cfg]` being processed or some weird `macro_rules!`
466
+ // expansion.
467
+ //
468
+ // What we *don't* want to catch is the fact that a user-defined
469
+ // literal like `0xf` is stringified as `15`, causing the cached token
470
+ // stream to not be literal `==` token-wise (ignoring spans) to the
471
+ // token stream we got from stringification.
472
+ //
473
+ // Instead the "probably equal" check here is "does each token
474
+ // recursively have the same discriminant?" We basically don't look at
475
+ // the token values here and assume that such fine grained token stream
476
+ // modifications, including adding/removing typically non-semantic
477
+ // tokens such as extra braces and commas, don't happen.
478
+ if let Some ( tokens) = tokens {
479
+ if tokens. probably_equal_for_proc_macro ( & tokens_for_real) {
480
+ return tokens
481
+ }
482
+ info ! ( "cached tokens found, but they're not \" probably equal\" , \
483
+ going with stringified version") ;
484
+ }
485
+ return tokens_for_real
486
+ }
487
+
488
+ fn prepend_attrs (
489
+ sess : & ParseSess ,
490
+ attrs : & [ ast:: Attribute ] ,
491
+ tokens : Option < & tokenstream:: TokenStream > ,
492
+ span : syntax_pos:: Span
493
+ ) -> Option < tokenstream:: TokenStream > {
494
+ let tokens = tokens?;
495
+ if attrs. len ( ) == 0 {
496
+ return Some ( tokens. clone ( ) )
497
+ }
498
+ let mut builder = tokenstream:: TokenStreamBuilder :: new ( ) ;
499
+ for attr in attrs {
500
+ assert_eq ! ( attr. style, ast:: AttrStyle :: Outer ,
501
+ "inner attributes should prevent cached tokens from existing" ) ;
502
+
503
+ let source = pprust:: attribute_to_string ( attr) ;
504
+ let macro_filename = FileName :: macro_expansion_source_code ( & source) ;
505
+ if attr. is_sugared_doc {
506
+ let stream = parse_stream_from_source_str ( macro_filename, source, sess, Some ( span) ) ;
507
+ builder. push ( stream) ;
508
+ continue
509
+ }
510
+
511
+ // synthesize # [ $path $tokens ] manually here
512
+ let mut brackets = tokenstream:: TokenStreamBuilder :: new ( ) ;
513
+
514
+ // For simple paths, push the identifier directly
515
+ if attr. path . segments . len ( ) == 1 && attr. path . segments [ 0 ] . args . is_none ( ) {
516
+ let ident = attr. path . segments [ 0 ] . ident ;
517
+ let token = token:: Ident ( ident. name , ident. as_str ( ) . starts_with ( "r#" ) ) ;
518
+ brackets. push ( tokenstream:: TokenTree :: token ( token, ident. span ) ) ;
519
+
520
+ // ... and for more complicated paths, fall back to a reparse hack that
521
+ // should eventually be removed.
522
+ } else {
523
+ let stream = parse_stream_from_source_str ( macro_filename, source, sess, Some ( span) ) ;
524
+ brackets. push ( stream) ;
525
+ }
526
+
527
+ brackets. push ( attr. tokens . clone ( ) ) ;
528
+
529
+ // The span we list here for `#` and for `[ ... ]` are both wrong in
530
+ // that it encompasses more than each token, but it hopefully is "good
531
+ // enough" for now at least.
532
+ builder. push ( tokenstream:: TokenTree :: token ( token:: Pound , attr. span ) ) ;
533
+ let delim_span = tokenstream:: DelimSpan :: from_single ( attr. span ) ;
534
+ builder. push ( tokenstream:: TokenTree :: Delimited (
535
+ delim_span, token:: DelimToken :: Bracket , brackets. build ( ) . into ( ) ) ) ;
536
+ }
537
+ builder. push ( tokens. clone ( ) ) ;
538
+ Some ( builder. build ( ) )
539
+ }
0 commit comments