Auto merge of #127516 - nnethercote:simplify-LazyAttrTokenStream, r=petrochenkov

bors · bors · commit f242d6c26cc6 · 2025-04-30T00:09:21.000Z
Simplify `LazyAttrTokenStream`

`LazyAttrTokenStream` is an unpleasant type: `Lrc&lt;Box&lt;dyn ToAttrTokenStream&gt;&gt;`. Why does it look like that?
- There are two `ToAttrTokenStream` impls, one for the lazy case, and one for the case where we already have an `AttrTokenStream`.
- The lazy case (`LazyAttrTokenStreamImpl`) is implemented in `rustc_parse`, but `LazyAttrTokenStream` is defined in `rustc_ast`, which does not depend on `rustc_parse`. The use of the trait lets `rustc_ast` implicitly depend on `rustc_parse`. This explains the `dyn`.
- `LazyAttrTokenStream` must have a `size_of` as small as possible, because it's used in many AST nodes. This explains the `Lrc&lt;Box&lt;_&gt;&gt;`, which keeps it to one word. (It's required `Lrc&lt;dyn _&gt;` would be a fat pointer.)

This PR moves `LazyAttrTokenStreamImpl` (and a few other token stream things) from `rustc_parse` to `rustc_ast`. This lets us replace the `ToAttrTokenStream` trait with a two-variant enum and also remove the `Box`, changing `LazyAttrTokenStream` to `Lrc&lt;LazyAttrTokenStreamInner&gt;`. Plus it does a few cleanups.

r? `@petrochenkov`
diff --git a/compiler/rustc_ast/src/lib.rs b/compiler/rustc_ast/src/lib.rs
@@ -12,13 +12,15 @@
     test(attr(deny(warnings)))
 )]
 #![doc(rust_logo)]
+#![feature(array_windows)]
 #![feature(associated_type_defaults)]
 #![feature(box_patterns)]
 #![feature(if_let_guard)]
 #![feature(negative_impls)]
 #![feature(never_type)]
 #![feature(rustdoc_internals)]
 #![feature(stmt_expr_attributes)]
+#![recursion_limit = "256"]
 // tidy-alphabetical-end
 
 pub mod util {
diff --git a/compiler/rustc_ast/src/mut_visit.rs b/compiler/rustc_ast/src/mut_visit.rs
@@ -836,7 +836,7 @@ fn visit_lazy_tts_opt_mut<T: MutVisitor>(vis: &mut T, lazy_tts: Option<&mut Lazy
         if let Some(lazy_tts) = lazy_tts {
             let mut tts = lazy_tts.to_attr_token_stream();
             visit_attr_tts(vis, &mut tts);
-            *lazy_tts = LazyAttrTokenStream::new(tts);
+            *lazy_tts = LazyAttrTokenStream::new_direct(tts);
         }
     }
 }
diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs
diff --git a/compiler/rustc_attr_parsing/src/lib.rs b/compiler/rustc_attr_parsing/src/lib.rs
@@ -80,6 +80,7 @@
 #![cfg_attr(bootstrap, feature(let_chains))]
 #![doc(rust_logo)]
 #![feature(rustdoc_internals)]
+#![recursion_limit = "256"]
 // tidy-alphabetical-end
 
 #[macro_use]
diff --git a/compiler/rustc_builtin_macros/src/lib.rs b/compiler/rustc_builtin_macros/src/lib.rs
@@ -18,6 +18,7 @@
 #![feature(rustdoc_internals)]
 #![feature(string_from_utf8_lossy_owned)]
 #![feature(try_blocks)]
+#![recursion_limit = "256"]
 // tidy-alphabetical-end
 
 extern crate proc_macro;
diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs
@@ -14,6 +14,7 @@
 #![feature(string_from_utf8_lossy_owned)]
 #![feature(trait_alias)]
 #![feature(try_blocks)]
+#![recursion_limit = "256"]
 // tidy-alphabetical-end
 
 //! This crate contains codegen code that is used by all codegen backends (LLVM and others).
diff --git a/compiler/rustc_expand/src/config.rs b/compiler/rustc_expand/src/config.rs
@@ -162,7 +162,7 @@ pub(crate) fn attr_into_trace(mut attr: Attribute, trace_name: Symbol) -> Attrib
             let NormalAttr { item, tokens } = &mut **normal;
             item.path.segments[0].ident.name = trace_name;
             // This makes the trace attributes unobservable to token-based proc macros.
-            *tokens = Some(LazyAttrTokenStream::new(AttrTokenStream::default()));
+            *tokens = Some(LazyAttrTokenStream::new_direct(AttrTokenStream::default()));
         }
         AttrKind::DocComment(..) => unreachable!(),
     }
@@ -192,7 +192,7 @@ impl<'a> StripUnconfigured<'a> {
         if self.config_tokens {
             if let Some(Some(tokens)) = node.tokens_mut() {
                 let attr_stream = tokens.to_attr_token_stream();
-                *tokens = LazyAttrTokenStream::new(self.configure_tokens(&attr_stream));
+                *tokens = LazyAttrTokenStream::new_direct(self.configure_tokens(&attr_stream));
             }
         }
     }
@@ -223,7 +223,7 @@ impl<'a> StripUnconfigured<'a> {
                     target.attrs.flat_map_in_place(|attr| self.process_cfg_attr(&attr));
 
                     if self.in_cfg(&target.attrs) {
-                        target.tokens = LazyAttrTokenStream::new(
+                        target.tokens = LazyAttrTokenStream::new_direct(
                             self.configure_tokens(&target.tokens.to_attr_token_stream()),
                         );
                         Some(AttrTokenTree::AttrsTarget(target))
@@ -361,7 +361,7 @@ impl<'a> StripUnconfigured<'a> {
                 .to_attr_token_stream(),
         ));
 
-        let tokens = Some(LazyAttrTokenStream::new(AttrTokenStream::new(trees)));
+        let tokens = Some(LazyAttrTokenStream::new_direct(AttrTokenStream::new(trees)));
         let attr = ast::attr::mk_attr_from_item(
             &self.sess.psess.attr_id_generator,
             item,
diff --git a/compiler/rustc_hir/src/lib.rs b/compiler/rustc_hir/src/lib.rs
@@ -14,6 +14,7 @@
 #![feature(never_type)]
 #![feature(rustc_attrs)]
 #![feature(variant_count)]
+#![recursion_limit = "256"]
 // tidy-alphabetical-end
 
 extern crate self as rustc_hir;
diff --git a/compiler/rustc_middle/src/lib.rs b/compiler/rustc_middle/src/lib.rs
@@ -61,6 +61,7 @@
 #![feature(try_trait_v2_yeet)]
 #![feature(type_alias_impl_trait)]
 #![feature(yeet_expr)]
+#![recursion_limit = "256"]
 // tidy-alphabetical-end
 
 #[cfg(test)]
diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs
@@ -5,13 +5,13 @@
 #![allow(rustc::diagnostic_outside_of_impl)]
 #![allow(rustc::untranslatable_diagnostic)]
 #![cfg_attr(bootstrap, feature(let_chains))]
-#![feature(array_windows)]
 #![feature(assert_matches)]
 #![feature(box_patterns)]
 #![feature(debug_closure_helpers)]
 #![feature(if_let_guard)]
 #![feature(iter_intersperse)]
 #![feature(string_from_utf8_lossy_owned)]
+#![recursion_limit = "256"]
 // tidy-alphabetical-end
 
 use std::path::{Path, PathBuf};
diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs
@@ -1,5 +1,6 @@
 use rustc_ast as ast;
 use rustc_ast::token::{self, MetaVarKind};
+use rustc_ast::tokenstream::ParserRange;
 use rustc_ast::{Attribute, attr};
 use rustc_errors::codes::*;
 use rustc_errors::{Diag, PResult};
@@ -8,8 +9,7 @@ use thin_vec::ThinVec;
 use tracing::debug;
 
 use super::{
-    AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, ParserRange, PathStyle, Trailing,
-    UsePreAttrPos,
+    AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, PathStyle, Trailing, UsePreAttrPos,
 };
 use crate::{errors, exp, fluent_generated as fluent};
 
diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs
@@ -1,21 +1,18 @@
 use std::borrow::Cow;
-use std::{iter, mem};
+use std::mem;
 
-use rustc_ast::token::{Delimiter, Token};
+use rustc_ast::token::Token;
 use rustc_ast::tokenstream::{
-    AttrTokenStream, AttrTokenTree, AttrsTarget, DelimSpacing, DelimSpan, LazyAttrTokenStream,
-    Spacing, ToAttrTokenStream,
+    AttrsTarget, LazyAttrTokenStream, NodeRange, ParserRange, Spacing, TokenCursor,
 };
 use rustc_ast::{self as ast, AttrVec, Attribute, HasAttrs, HasTokens};
 use rustc_data_structures::fx::FxHashSet;
 use rustc_errors::PResult;
 use rustc_session::parse::ParseSess;
-use rustc_span::{DUMMY_SP, Span, sym};
+use rustc_span::{DUMMY_SP, sym};
+use thin_vec::ThinVec;
 
-use super::{
-    Capturing, FlatToken, ForceCollect, NodeRange, NodeReplacement, Parser, ParserRange,
-    TokenCursor, Trailing,
-};
+use super::{Capturing, ForceCollect, Parser, Trailing};
 
 // When collecting tokens, this fully captures the start point. Usually its
 // just after outer attributes, but occasionally it's before.
@@ -94,95 +91,6 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
     })
 }
 
-// From a value of this type we can reconstruct the `TokenStream` seen by the
-// `f` callback passed to a call to `Parser::collect_tokens`, by
-// replaying the getting of the tokens. This saves us producing a `TokenStream`
-// if it is never needed, e.g. a captured `macro_rules!` argument that is never
-// passed to a proc macro. In practice, token stream creation happens rarely
-// compared to calls to `collect_tokens` (see some statistics in #78736) so we
-// are doing as little up-front work as possible.
-//
-// This also makes `Parser` very cheap to clone, since
-// there is no intermediate collection buffer to clone.
-struct LazyAttrTokenStreamImpl {
-    start_token: (Token, Spacing),
-    cursor_snapshot: TokenCursor,
-    num_calls: u32,
-    break_last_token: u32,
-    node_replacements: Box<[NodeReplacement]>,
-}
-
-impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
-    fn to_attr_token_stream(&self) -> AttrTokenStream {
-        // The token produced by the final call to `{,inlined_}next` was not
-        // actually consumed by the callback. The combination of chaining the
-        // initial token and using `take` produces the desired result - we
-        // produce an empty `TokenStream` if no calls were made, and omit the
-        // final token otherwise.
-        let mut cursor_snapshot = self.cursor_snapshot.clone();
-        let tokens = iter::once(FlatToken::Token(self.start_token))
-            .chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next())))
-            .take(self.num_calls as usize);
-
-        if self.node_replacements.is_empty() {
-            make_attr_token_stream(tokens, self.break_last_token)
-        } else {
-            let mut tokens: Vec<_> = tokens.collect();
-            let mut node_replacements = self.node_replacements.to_vec();
-            node_replacements.sort_by_key(|(range, _)| range.0.start);
-
-            #[cfg(debug_assertions)]
-            for [(node_range, tokens), (next_node_range, next_tokens)] in
-                node_replacements.array_windows()
-            {
-                assert!(
-                    node_range.0.end <= next_node_range.0.start
-                        || node_range.0.end >= next_node_range.0.end,
-                    "Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
-                    node_range,
-                    tokens,
-                    next_node_range,
-                    next_tokens,
-                );
-            }
-
-            // Process the replace ranges, starting from the highest start
-            // position and working our way back. If have tokens like:
-            //
-            // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
-            //
-            // Then we will generate replace ranges for both
-            // the `#[cfg(FALSE)] field: bool` and the entire
-            // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
-            //
-            // By starting processing from the replace range with the greatest
-            // start position, we ensure that any (outer) replace range which
-            // encloses another (inner) replace range will fully overwrite the
-            // inner range's replacement.
-            for (node_range, target) in node_replacements.into_iter().rev() {
-                assert!(
-                    !node_range.0.is_empty(),
-                    "Cannot replace an empty node range: {:?}",
-                    node_range.0
-                );
-
-                // Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s, plus
-                // enough `FlatToken::Empty`s to fill up the rest of the range. This keeps the
-                // total length of `tokens` constant throughout the replacement process, allowing
-                // us to do all replacements without adjusting indices.
-                let target_len = target.is_some() as usize;
-                tokens.splice(
-                    (node_range.0.start as usize)..(node_range.0.end as usize),
-                    target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain(
-                        iter::repeat(FlatToken::Empty).take(node_range.0.len() - target_len),
-                    ),
-                );
-            }
-            make_attr_token_stream(tokens.into_iter(), self.break_last_token)
-        }
-    }
-}
-
 impl<'a> Parser<'a> {
     pub(super) fn collect_pos(&self) -> CollectPos {
         CollectPos {
@@ -387,10 +295,10 @@ impl<'a> Parser<'a> {
 
         // This is hot enough for `deep-vector` that checking the conditions for an empty iterator
         // is measurably faster than actually executing the iterator.
-        let node_replacements: Box<[_]> = if parser_replacements_start == parser_replacements_end
+        let node_replacements = if parser_replacements_start == parser_replacements_end
             && inner_attr_parser_replacements.is_empty()
         {
-            Box::new([])
+            ThinVec::new()
         } else {
             // Grab any replace ranges that occur *inside* the current AST node. Convert them
             // from `ParserRange` form to `NodeRange` form. We will perform the actual
@@ -429,13 +337,13 @@ impl<'a> Parser<'a> {
         //     - `attrs`: includes the outer and the inner attr.
         //     - `tokens`: lazy tokens for `g` (with its inner attr deleted).
 
-        let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl {
-            start_token: collect_pos.start_token,
-            cursor_snapshot: collect_pos.cursor_snapshot,
+        let tokens = LazyAttrTokenStream::new_pending(
+            collect_pos.start_token,
+            collect_pos.cursor_snapshot,
             num_calls,
-            break_last_token: self.break_last_token,
+            self.break_last_token,
             node_replacements,
-        });
+        );
         let mut tokens_used = false;
 
         // If in "definite capture mode" we need to register a replace range
@@ -483,71 +391,6 @@ impl<'a> Parser<'a> {
     }
 }
 
-/// Converts a flattened iterator of tokens (including open and close delimiter tokens) into an
-/// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and
-/// close delims.
-fn make_attr_token_stream(
-    iter: impl Iterator<Item = FlatToken>,
-    break_last_token: u32,
-) -> AttrTokenStream {
-    #[derive(Debug)]
-    struct FrameData {
-        // This is `None` for the first frame, `Some` for all others.
-        open_delim_sp: Option<(Delimiter, Span, Spacing)>,
-        inner: Vec<AttrTokenTree>,
-    }
-    // The stack always has at least one element. Storing it separately makes for shorter code.
-    let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] };
-    let mut stack_rest = vec![];
-    for flat_token in iter {
-        match flat_token {
-            FlatToken::Token((token @ Token { kind, span }, spacing)) => {
-                if let Some(delim) = kind.open_delim() {
-                    stack_rest.push(mem::replace(
-                        &mut stack_top,
-                        FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] },
-                    ));
-                } else if let Some(delim) = kind.close_delim() {
-                    let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap());
-                    let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap();
-                    assert!(
-                        open_delim.eq_ignoring_invisible_origin(&delim),
-                        "Mismatched open/close delims: open={open_delim:?} close={span:?}"
-                    );
-                    let dspan = DelimSpan::from_pair(open_sp, span);
-                    let dspacing = DelimSpacing::new(open_spacing, spacing);
-                    let stream = AttrTokenStream::new(frame_data.inner);
-                    let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream);
-                    stack_top.inner.push(delimited);
-                } else {
-                    stack_top.inner.push(AttrTokenTree::Token(token, spacing))
-                }
-            }
-            FlatToken::AttrsTarget(target) => {
-                stack_top.inner.push(AttrTokenTree::AttrsTarget(target))
-            }
-            FlatToken::Empty => {}
-        }
-    }
-
-    if break_last_token > 0 {
-        let last_token = stack_top.inner.pop().unwrap();
-        if let AttrTokenTree::Token(last_token, spacing) = last_token {
-            let (unglued, _) = last_token.kind.break_two_token_op(break_last_token).unwrap();
-
-            // Tokens are always ASCII chars, so we can use byte arithmetic here.
-            let mut first_span = last_token.span.shrink_to_lo();
-            first_span =
-                first_span.with_hi(first_span.lo() + rustc_span::BytePos(break_last_token));
-
-            stack_top.inner.push(AttrTokenTree::Token(Token::new(unglued, first_span), spacing));
-        } else {
-            panic!("Unexpected last token {last_token:?}")
-        }
-    }
-    AttrTokenStream::new(stack_top.inner)
-}
-
 /// Tokens are needed if:
 /// - any non-single-segment attributes (other than doc comments) are present,
 ///   e.g. `rustfmt::skip`; or
@@ -562,14 +405,3 @@ fn needs_tokens(attrs: &[ast::Attribute]) -> bool {
         }
     })
 }
-
-// Some types are used a lot. Make sure they don't unintentionally get bigger.
-#[cfg(target_pointer_width = "64")]
-mod size_asserts {
-    use rustc_data_structures::static_assert_size;
-
-    use super::*;
-    // tidy-alphabetical-start
-    static_assert_size!(LazyAttrTokenStreamImpl, 96);
-    // tidy-alphabetical-end
-}
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
diff --git a/compiler/rustc_resolve/src/lib.rs b/compiler/rustc_resolve/src/lib.rs

Original file line number	Diff line number	Diff line change
`@@ -836,7 +836,7 @@ fn visit_lazy_tts_opt_mut<T: MutVisitor>(vis: &mut T, lazy_tts: Option<&mut Lazy`
`836`	`836`	`if let Some(lazy_tts) = lazy_tts {`
`837`	`837`	`let mut tts = lazy_tts.to_attr_token_stream();`
`838`	`838`	`visit_attr_tts(vis, &mut tts);`
`839`		`- *lazy_tts = LazyAttrTokenStream::new(tts);`
	`839`	`+ *lazy_tts = LazyAttrTokenStream::new_direct(tts);`
`840`	`840`	`}`
`841`	`841`	`}`
`842`	`842`	`}`