Skip to content

Commit 4304f75

Browse files
committed
Use a sharded symbol interner
This should reduce contention when accessing the symbol interner from multiple threads when using parallel rustc.
1 parent 6cdd42f commit 4304f75

File tree

6 files changed

+97
-41
lines changed

6 files changed

+97
-41
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -4036,6 +4036,7 @@ version = "0.1.0"
40364036
dependencies = [
40374037
"proc-macro2",
40384038
"quote",
4039+
"rustc-hash",
40394040
"syn",
40404041
"synstructure",
40414042
]

compiler/rustc_macros/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ proc-macro = true
1010
synstructure = "0.12.1"
1111
syn = { version = "1", features = ["full"] }
1212
proc-macro2 = "1"
13+
rustc-hash = "1.1.0"
1314
quote = "1"

compiler/rustc_macros/src/symbols.rs

+52-21
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@
2323
//! ```
2424
2525
use proc_macro2::{Span, TokenStream};
26-
use quote::quote;
26+
use quote::{quote, ToTokens};
2727
use std::collections::HashMap;
28+
use std::hash::{Hash, Hasher};
2829
use syn::parse::{Parse, ParseStream, Result};
2930
use syn::{braced, punctuated::Punctuated, Ident, LitStr, Token};
3031

@@ -100,6 +101,46 @@ impl Errors {
100101
}
101102
}
102103

104+
struct PrefillStream {
105+
prefill: [Vec<String>; 256],
106+
}
107+
108+
impl PrefillStream {
109+
fn new() -> Self {
110+
const EMPTY_VEC: Vec<String> = Vec::new();
111+
PrefillStream { prefill: [EMPTY_VEC; 256] }
112+
}
113+
114+
fn add_symbol(&mut self, symbol: String) -> u32 {
115+
// Warning: hasher has to be kept in sync with rustc_span::symbols to ensure that all
116+
// pre-filled symbols are assigned the correct shard.
117+
let mut state = rustc_hash::FxHasher::default();
118+
symbol.hash(&mut state);
119+
let hash = state.finish();
120+
121+
let shard = (hash & 0xff) as usize;
122+
let index = self.prefill[shard].len();
123+
124+
self.prefill[shard].push(symbol);
125+
126+
(index << 8) as u32 | shard as u32
127+
}
128+
}
129+
130+
impl ToTokens for PrefillStream {
131+
fn to_tokens(&self, tokens: &mut TokenStream) {
132+
for shard in &self.prefill {
133+
let mut shard_stream = quote! {};
134+
for symbol in shard {
135+
shard_stream.extend(quote! { #symbol, });
136+
}
137+
tokens.extend(quote! {
138+
&[#shard_stream],
139+
});
140+
}
141+
}
142+
}
143+
103144
pub fn symbols(input: TokenStream) -> TokenStream {
104145
let (mut output, errors) = symbols_with_errors(input);
105146

@@ -126,8 +167,8 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
126167

127168
let mut keyword_stream = quote! {};
128169
let mut symbols_stream = quote! {};
129-
let mut prefill_stream = quote! {};
130-
let mut counter = 0u32;
170+
let mut digit_stream = quote! {};
171+
let mut prefill_stream = PrefillStream::new();
131172
let mut keys =
132173
HashMap::<String, Span>::with_capacity(input.keywords.len() + input.symbols.len() + 10);
133174
let mut prev_key: Option<(Span, String)> = None;
@@ -157,13 +198,10 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
157198
let value = &keyword.value;
158199
let value_string = value.value();
159200
check_dup(keyword.name.span(), &value_string, &mut errors);
160-
prefill_stream.extend(quote! {
161-
#value,
162-
});
201+
let sym = prefill_stream.add_symbol(value_string);
163202
keyword_stream.extend(quote! {
164-
pub const #name: Symbol = Symbol::new(#counter);
203+
pub const #name: Symbol = Symbol::new(#sym);
165204
});
166-
counter += 1;
167205
}
168206

169207
// Generate the listed symbols.
@@ -176,30 +214,21 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
176214
check_dup(symbol.name.span(), &value, &mut errors);
177215
check_order(symbol.name.span(), &name.to_string(), &mut errors);
178216

179-
prefill_stream.extend(quote! {
180-
#value,
181-
});
217+
let sym = prefill_stream.add_symbol(value);
182218
symbols_stream.extend(quote! {
183-
pub const #name: Symbol = Symbol::new(#counter);
219+
pub const #name: Symbol = Symbol::new(#sym);
184220
});
185-
counter += 1;
186221
}
187222

188223
// Generate symbols for the strings "0", "1", ..., "9".
189-
let digits_base = counter;
190-
counter += 10;
191224
for n in 0..10 {
192225
let n = n.to_string();
193226
check_dup(Span::call_site(), &n, &mut errors);
194-
prefill_stream.extend(quote! {
195-
#n,
196-
});
227+
let sym = prefill_stream.add_symbol(n);
228+
digit_stream.extend(quote! { Symbol::new(#sym), });
197229
}
198-
let _ = counter; // for future use
199230

200231
let output = quote! {
201-
const SYMBOL_DIGITS_BASE: u32 = #digits_base;
202-
203232
#[doc(hidden)]
204233
#[allow(non_upper_case_globals)]
205234
mod kw_generated {
@@ -214,6 +243,8 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
214243
#symbols_stream
215244
}
216245

246+
const SYMBOL_DIGITS: [Symbol; 10] = [#digit_stream];
247+
217248
impl Interner {
218249
pub(crate) fn fresh() -> Self {
219250
Interner::prefill(&[

compiler/rustc_span/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
1717
#![feature(array_windows)]
1818
#![feature(crate_visibility_modifier)]
19+
#![feature(hash_raw_entry)]
1920
#![feature(if_let_guard)]
2021
#![feature(negative_impls)]
2122
#![feature(nll)]

compiler/rustc_span/src/symbol.rs

+41-19
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33
//! type, and vice versa.
44
55
use rustc_arena::DroplessArena;
6-
use rustc_data_structures::fx::FxHashMap;
6+
use rustc_data_structures::fx::{FxHashMap, FxHasher};
77
use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey};
88
use rustc_data_structures::sync::Lock;
99
use rustc_macros::HashStable_Generic;
1010
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
1111

1212
use std::cmp::{Ord, PartialEq, PartialOrd};
13+
use std::collections::hash_map::RawEntryMut;
1314
use std::fmt;
1415
use std::hash::{Hash, Hasher};
1516
use std::str;
@@ -1698,8 +1699,7 @@ impl<CTX> ToStableHashKey<CTX> for Symbol {
16981699
}
16991700
}
17001701

1701-
#[derive(Default)]
1702-
pub(crate) struct Interner(Lock<InternerInner>);
1702+
pub(crate) struct Interner([Lock<InternerShard>; 256]);
17031703

17041704
// The `&'static str`s in this type actually point into the arena.
17051705
//
@@ -1710,46 +1710,68 @@ pub(crate) struct Interner(Lock<InternerInner>);
17101710
// This type is private to prevent accidentally constructing more than one `Interner` on the same
17111711
// thread, which makes it easy to mixup `Symbol`s between `Interner`s.
17121712
#[derive(Default)]
1713-
struct InternerInner {
1713+
struct InternerShard {
17141714
arena: DroplessArena,
17151715
names: FxHashMap<&'static str, Symbol>,
17161716
strings: Vec<&'static str>,
17171717
}
17181718

17191719
impl Interner {
1720-
fn prefill(init: &[&'static str]) -> Self {
1721-
Interner(Lock::new(InternerInner {
1722-
strings: init.into(),
1723-
names: init.iter().copied().zip((0..).map(Symbol::new)).collect(),
1724-
..Default::default()
1720+
#[cfg(test)]
1721+
fn empty_for_test() -> Self {
1722+
Self([(); 256].map(|()| Lock::new(InternerShard::default())))
1723+
}
1724+
1725+
fn prefill(init: &[&[&'static str]; 256]) -> Self {
1726+
let mut i = 0;
1727+
Interner(init.map(|init| {
1728+
let shard = Lock::new(InternerShard {
1729+
strings: init.into(),
1730+
names: init
1731+
.iter()
1732+
.copied()
1733+
.zip((0..).map(|idx| Symbol::new(idx << 8 | i)))
1734+
.collect(),
1735+
..Default::default()
1736+
});
1737+
i += 1;
1738+
shard
17251739
}))
17261740
}
17271741

17281742
#[inline]
17291743
fn intern(&self, string: &str) -> Symbol {
1730-
let mut inner = self.0.lock();
1731-
if let Some(&name) = inner.names.get(string) {
1732-
return name;
1733-
}
1744+
// Warning: hasher has to be kept in sync with rustc_macros::symbols to ensure that all
1745+
// pre-filled symbols are assigned the correct shard.
1746+
let mut state = FxHasher::default();
1747+
string.hash(&mut state);
1748+
let hash = state.finish();
1749+
1750+
let mut shard = self.0[(hash & 0xff) as usize].lock();
1751+
let shard = &mut *shard;
1752+
let vac = match shard.names.raw_entry_mut().from_key_hashed_nocheck(hash, string) {
1753+
RawEntryMut::Occupied(occ) => return *occ.get(),
1754+
RawEntryMut::Vacant(vac) => vac,
1755+
};
17341756

1735-
let name = Symbol::new(inner.strings.len() as u32);
1757+
let name = Symbol::new(((shard.strings.len() as u32) << 8) | (hash as u32 & 0xff));
17361758

17371759
// `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
17381760
// UTF-8.
17391761
let string: &str =
1740-
unsafe { str::from_utf8_unchecked(inner.arena.alloc_slice(string.as_bytes())) };
1762+
unsafe { str::from_utf8_unchecked(shard.arena.alloc_slice(string.as_bytes())) };
17411763
// It is safe to extend the arena allocation to `'static` because we only access
17421764
// these while the arena is still alive.
17431765
let string: &'static str = unsafe { &*(string as *const str) };
1744-
inner.strings.push(string);
1745-
inner.names.insert(string, name);
1766+
shard.strings.push(string);
1767+
vac.insert(string, name);
17461768
name
17471769
}
17481770

17491771
// Get the symbol as a string. `Symbol::as_str()` should be used in
17501772
// preference to this function.
17511773
fn get(&self, symbol: Symbol) -> &str {
1752-
self.0.lock().strings[symbol.0.as_usize()]
1774+
self.0[symbol.0.as_usize() & 0xff].lock().strings[symbol.0.as_usize() >> 8]
17531775
}
17541776
}
17551777

@@ -1784,7 +1806,7 @@ pub mod sym {
17841806
pub fn integer<N: TryInto<usize> + Copy + ToString>(n: N) -> Symbol {
17851807
if let Result::Ok(idx) = n.try_into() {
17861808
if idx < 10 {
1787-
return Symbol::new(super::SYMBOL_DIGITS_BASE + idx as u32);
1809+
return super::SYMBOL_DIGITS[idx];
17881810
}
17891811
}
17901812
Symbol::intern(&n.to_string())

compiler/rustc_span/src/symbol/tests.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crate::create_default_session_globals_then;
44

55
#[test]
66
fn interner_tests() {
7-
let i = Interner::default();
7+
let i = Interner::empty_for_test();
88
// first one is zero:
99
assert_eq!(i.intern("dog"), Symbol::new(0));
1010
// re-use gets the same entry:

0 commit comments

Comments
 (0)