Skip to content

Commit cbed977

Browse files
authored
Rollup merge of #35539 - cgswords:ts_concat, r=nrc
Implemented a smarter TokenStream concatenation system The new algorithm performs 'aggressive compacting' during concatenation as follows: - If the nodes' combined total total length is less than 32, we copy both of them into a new vector and build a new leaf node. - If one node is an internal node and the other is a 'small' leaf (length<32), we recur down the internal node on the appropriate side. - Otherwise, we construct a new internal node that points to them as left and right. This should produce notably better behavior than the current concatenation implementation.
2 parents 3a86773 + 16cc8a7 commit cbed977

File tree

1 file changed

+92
-20
lines changed

1 file changed

+92
-20
lines changed

src/libsyntax/tokenstream.rs

+92-20
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,11 @@ pub struct TokenStream {
340340
ts: InternalTS,
341341
}
342342

343+
// This indicates the maximum size for a leaf in the concatenation algorithm.
344+
// If two leafs will be collectively smaller than this, they will be merged.
345+
// If a leaf is larger than this, it will be concatenated at the top.
346+
const LEAF_SIZE : usize = 32;
347+
343348
// NB If Leaf access proves to be slow, inroducing a secondary Leaf without the bounds
344349
// for unsliced Leafs may lead to some performance improvemenet.
345350
#[derive(Clone, PartialEq, Eq, Hash, RustcEncodable, RustcDecodable)]
@@ -483,6 +488,37 @@ impl InternalTS {
483488
}
484489
}
485490
}
491+
492+
fn to_vec(&self) -> Vec<&TokenTree> {
493+
let mut res = Vec::with_capacity(self.len());
494+
fn traverse_and_append<'a>(res: &mut Vec<&'a TokenTree>, ts: &'a InternalTS) {
495+
match *ts {
496+
InternalTS::Empty(..) => {},
497+
InternalTS::Leaf { ref tts, offset, len, .. } => {
498+
let mut to_app = tts[offset..offset + len].iter().collect();
499+
res.append(&mut to_app);
500+
}
501+
InternalTS::Node { ref left, ref right, .. } => {
502+
traverse_and_append(res, left);
503+
traverse_and_append(res, right);
504+
}
505+
}
506+
}
507+
traverse_and_append(&mut res, self);
508+
res
509+
}
510+
511+
fn to_tts(&self) -> Vec<TokenTree> {
512+
self.to_vec().into_iter().cloned().collect::<Vec<TokenTree>>()
513+
}
514+
515+
// Returns an internal node's children.
516+
fn children(&self) -> Option<(Rc<InternalTS>, Rc<InternalTS>)> {
517+
match *self {
518+
InternalTS::Node { ref left, ref right, .. } => Some((left.clone(), right.clone())),
519+
_ => None,
520+
}
521+
}
486522
}
487523

488524
/// TokenStream operators include basic destructuring, boolean operations, `maybe_...`
@@ -496,14 +532,17 @@ impl InternalTS {
496532
///
497533
/// `maybe_path_prefix("a::b::c(a,b,c).foo()") -> (a::b::c, "(a,b,c).foo()")`
498534
impl TokenStream {
535+
// Construct an empty node with a dummy span.
499536
pub fn mk_empty() -> TokenStream {
500537
TokenStream { ts: InternalTS::Empty(DUMMY_SP) }
501538
}
502539

540+
// Construct an empty node with the provided span.
503541
fn mk_spanned_empty(sp: Span) -> TokenStream {
504542
TokenStream { ts: InternalTS::Empty(sp) }
505543
}
506544

545+
// Construct a leaf node with a 0 offset and length equivalent to the input.
507546
fn mk_leaf(tts: Rc<Vec<TokenTree>>, sp: Span) -> TokenStream {
508547
let len = tts.len();
509548
TokenStream {
@@ -516,6 +555,7 @@ impl TokenStream {
516555
}
517556
}
518557

558+
// Construct a leaf node with the provided values.
519559
fn mk_sub_leaf(tts: Rc<Vec<TokenTree>>, offset: usize, len: usize, sp: Span) -> TokenStream {
520560
TokenStream {
521561
ts: InternalTS::Leaf {
@@ -527,6 +567,7 @@ impl TokenStream {
527567
}
528568
}
529569

570+
// Construct an internal node with the provided values.
530571
fn mk_int_node(left: Rc<InternalTS>,
531572
right: Rc<InternalTS>,
532573
len: usize,
@@ -567,11 +608,56 @@ impl TokenStream {
567608
}
568609
}
569610

570-
/// Concatenates two TokenStreams into a new TokenStream
611+
/// Concatenates two TokenStreams into a new TokenStream.
571612
pub fn concat(left: TokenStream, right: TokenStream) -> TokenStream {
572-
let new_len = left.len() + right.len();
573-
let new_span = combine_spans(left.span(), right.span());
574-
TokenStream::mk_int_node(Rc::new(left.ts), Rc::new(right.ts), new_len, new_span)
613+
// This internal procedure performs 'aggressive compacting' during concatenation as
614+
// follows:
615+
// - If the nodes' combined total total length is less than 32, we copy both of
616+
// them into a new vector and build a new leaf node.
617+
// - If one node is an internal node and the other is a 'small' leaf (length<32),
618+
// we recur down the internal node on the appropriate side.
619+
// - Otherwise, we construct a new internal node that points to them as left and
620+
// right.
621+
fn concat_internal(left: Rc<InternalTS>, right: Rc<InternalTS>) -> TokenStream {
622+
let llen = left.len();
623+
let rlen = right.len();
624+
let len = llen + rlen;
625+
let span = combine_spans(left.span(), right.span());
626+
if len <= LEAF_SIZE {
627+
let mut new_vec = left.to_tts();
628+
let mut rvec = right.to_tts();
629+
new_vec.append(&mut rvec);
630+
return TokenStream::mk_leaf(Rc::new(new_vec), span);
631+
}
632+
633+
match (left.children(), right.children()) {
634+
(Some((lleft, lright)), None) => {
635+
if rlen <= LEAF_SIZE {
636+
let new_right = concat_internal(lright, right);
637+
TokenStream::mk_int_node(lleft, Rc::new(new_right.ts), len, span)
638+
} else {
639+
TokenStream::mk_int_node(left, right, len, span)
640+
}
641+
}
642+
(None, Some((rleft, rright))) => {
643+
if rlen <= LEAF_SIZE {
644+
let new_left = concat_internal(left, rleft);
645+
TokenStream::mk_int_node(Rc::new(new_left.ts), rright, len, span)
646+
} else {
647+
TokenStream::mk_int_node(left, right, len, span)
648+
}
649+
}
650+
(_, _) => TokenStream::mk_int_node(left, right, len, span),
651+
}
652+
}
653+
654+
if left.is_empty() {
655+
right
656+
} else if right.is_empty() {
657+
left
658+
} else {
659+
concat_internal(Rc::new(left.ts), Rc::new(right.ts))
660+
}
575661
}
576662

577663
/// Indicate if the TokenStream is empty.
@@ -586,27 +672,13 @@ impl TokenStream {
586672

587673
/// Convert a TokenStream into a vector of borrowed TokenTrees.
588674
pub fn to_vec(&self) -> Vec<&TokenTree> {
589-
fn internal_to_vec(ts: &InternalTS) -> Vec<&TokenTree> {
590-
match *ts {
591-
InternalTS::Empty(..) => Vec::new(),
592-
InternalTS::Leaf { ref tts, offset, len, .. } => {
593-
tts[offset..offset + len].iter().collect()
594-
}
595-
InternalTS::Node { ref left, ref right, .. } => {
596-
let mut v1 = internal_to_vec(left);
597-
let mut v2 = internal_to_vec(right);
598-
v1.append(&mut v2);
599-
v1
600-
}
601-
}
602-
}
603-
internal_to_vec(&self.ts)
675+
self.ts.to_vec()
604676
}
605677

606678
/// Convert a TokenStream into a vector of TokenTrees (by cloning the TokenTrees).
607679
/// (This operation is an O(n) deep copy of the underlying structure.)
608680
pub fn to_tts(&self) -> Vec<TokenTree> {
609-
self.to_vec().into_iter().cloned().collect::<Vec<TokenTree>>()
681+
self.ts.to_tts()
610682
}
611683

612684
/// Return the TokenStream's span.

0 commit comments

Comments
 (0)