Skip to content

Commit 2a31958

Browse files
committed
Auto merge of rust-lang#115747 - Zoxc:query-hashes, r=<try>
Optimize hash map operations in the query system This optimizes hash map operations in the query system by explicitly passing hashes and using more optimal operations. `find_or_find_insert_slot` in particular saves a hash table lookup over `entry`. It's not yet available in a safe API, but will be in rust-lang/hashbrown#466. <table><tr><td rowspan="2">Benchmark</td><td colspan="1"><b>Before</b></th><td colspan="2"><b>After</b></th></tr><tr><td align="right">Time</td><td align="right">Time</td><td align="right">%</th></tr><tr><td>🟣 <b>clap</b>:check</td><td align="right">1.6189s</td><td align="right">1.6129s</td><td align="right"> -0.37%</td></tr><tr><td>🟣 <b>hyper</b>:check</td><td align="right">0.2353s</td><td align="right">0.2337s</td><td align="right"> -0.67%</td></tr><tr><td>🟣 <b>regex</b>:check</td><td align="right">0.9344s</td><td align="right">0.9289s</td><td align="right"> -0.59%</td></tr><tr><td>🟣 <b>syn</b>:check</td><td align="right">1.4693s</td><td align="right">1.4652s</td><td align="right"> -0.28%</td></tr><tr><td>🟣 <b>syntex_syntax</b>:check</td><td align="right">5.6606s</td><td align="right">5.6439s</td><td align="right"> -0.30%</td></tr><tr><td>Total</td><td align="right">9.9185s</td><td align="right">9.8846s</td><td align="right"> -0.34%</td></tr><tr><td>Summary</td><td align="right">1.0000s</td><td align="right">0.9956s</td><td align="right"> -0.44%</td></tr></table> r? `@cjgillot`
2 parents 8c04c06 + 54d9510 commit 2a31958

File tree

10 files changed

+87
-43
lines changed

10 files changed

+87
-43
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -4350,6 +4350,7 @@ name = "rustc_query_system"
43504350
version = "0.0.0"
43514351
dependencies = [
43524352
"parking_lot 0.12.1",
4353+
"rustc-hash",
43534354
"rustc-rayon-core",
43544355
"rustc_ast",
43554356
"rustc_data_structures",

compiler/rustc_data_structures/src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ extern crate cfg_if;
4747
#[macro_use]
4848
extern crate rustc_macros;
4949

50+
#[cfg(parallel_compiler)]
51+
extern crate hashbrown;
52+
5053
use std::fmt;
5154

5255
pub use rustc_index::static_assert_size;

compiler/rustc_data_structures/src/marker.rs

+1
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ cfg_if!(
8787
[std::sync::mpsc::Sender<T> where T: DynSend]
8888
[std::sync::Arc<T> where T: ?Sized + DynSync + DynSend]
8989
[std::sync::LazyLock<T, F> where T: DynSend, F: DynSend]
90+
[hashbrown::HashMap<K, V, S> where K: DynSend, V: DynSend, S: DynSend]
9091
[std::collections::HashSet<K, S> where K: DynSend, S: DynSend]
9192
[std::collections::HashMap<K, V, S> where K: DynSend, V: DynSend, S: DynSend]
9293
[std::collections::BTreeMap<K, V, A> where K: DynSend, V: DynSend, A: std::alloc::Allocator + Clone + DynSend]

compiler/rustc_middle/src/query/plumbing.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,8 @@ macro_rules! define_feedable {
524524
&value,
525525
hash_result!([$($modifiers)*]),
526526
);
527-
cache.complete(key, erased, dep_node_index);
527+
let key_hash = rustc_data_structures::sharded::make_hash(&key);
528+
cache.complete(key, key_hash, erased, dep_node_index);
528529
}
529530
}
530531
}

compiler/rustc_query_system/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ edition = "2021"
77

88
[dependencies]
99
parking_lot = "0.12"
10+
rustc-hash = "1.1.0"
1011
rustc_ast = { path = "../rustc_ast" }
1112
rustc_data_structures = { path = "../rustc_data_structures" }
1213
rustc_errors = { path = "../rustc_errors" }

compiler/rustc_query_system/src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ extern crate rustc_data_structures;
1616
#[macro_use]
1717
extern crate rustc_macros;
1818

19+
#[allow(unused_extern_crates)]
20+
extern crate hashbrown;
21+
1922
use rustc_errors::{DiagnosticMessage, SubdiagnosticMessage};
2023
use rustc_fluent_macro::fluent_messages;
2124

compiler/rustc_query_system/src/query/caches.rs

+16-16
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
use crate::dep_graph::DepNodeIndex;
2-
3-
use rustc_data_structures::fx::FxHashMap;
42
use rustc_data_structures::sharded::{self, Sharded};
53
use rustc_data_structures::sync::OnceLock;
4+
use rustc_hash::FxHasher;
65
use rustc_index::{Idx, IndexVec};
76
use std::fmt::Debug;
7+
use std::hash::BuildHasherDefault;
88
use std::hash::Hash;
99
use std::marker::PhantomData;
1010

@@ -19,9 +19,9 @@ pub trait QueryCache: Sized {
1919
type Value: Copy;
2020

2121
/// Checks if the query is already computed and in the cache.
22-
fn lookup(&self, key: &Self::Key) -> Option<(Self::Value, DepNodeIndex)>;
22+
fn lookup(&self, key: &Self::Key, key_hash: u64) -> Option<(Self::Value, DepNodeIndex)>;
2323

24-
fn complete(&self, key: Self::Key, value: Self::Value, index: DepNodeIndex);
24+
fn complete(&self, key: Self::Key, key_hash: u64, value: Self::Value, index: DepNodeIndex);
2525

2626
fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex));
2727
}
@@ -35,7 +35,7 @@ impl<'tcx, K: Eq + Hash, V: 'tcx> CacheSelector<'tcx, V> for DefaultCacheSelecto
3535
}
3636

3737
pub struct DefaultCache<K, V> {
38-
cache: Sharded<FxHashMap<K, (V, DepNodeIndex)>>,
38+
cache: Sharded<hashbrown::HashMap<K, (V, DepNodeIndex), BuildHasherDefault<FxHasher>>>,
3939
}
4040

4141
impl<K, V> Default for DefaultCache<K, V> {
@@ -53,20 +53,20 @@ where
5353
type Value = V;
5454

5555
#[inline(always)]
56-
fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
57-
let key_hash = sharded::make_hash(key);
56+
fn lookup(&self, key: &K, key_hash: u64) -> Option<(V, DepNodeIndex)> {
5857
let lock = self.cache.lock_shard_by_hash(key_hash);
5958
let result = lock.raw_entry().from_key_hashed_nocheck(key_hash, key);
6059

6160
if let Some((_, value)) = result { Some(*value) } else { None }
6261
}
6362

6463
#[inline]
65-
fn complete(&self, key: K, value: V, index: DepNodeIndex) {
66-
let mut lock = self.cache.lock_shard_by_value(&key);
67-
// We may be overwriting another value. This is all right, since the dep-graph
68-
// will check that the fingerprint matches.
69-
lock.insert(key, (value, index));
64+
fn complete(&self, key: K, key_hash: u64, value: V, index: DepNodeIndex) {
65+
let mut lock = self.cache.lock_shard_by_hash(key_hash);
66+
// Use the raw table API since we know that the
67+
// key does not exist and we already have the hash.
68+
lock.raw_table_mut()
69+
.insert(key_hash, (key, (value, index)), |(k, _)| sharded::make_hash(k));
7070
}
7171

7272
fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
@@ -104,12 +104,12 @@ where
104104
type Value = V;
105105

106106
#[inline(always)]
107-
fn lookup(&self, _key: &()) -> Option<(V, DepNodeIndex)> {
107+
fn lookup(&self, _key: &(), _key_hash: u64) -> Option<(V, DepNodeIndex)> {
108108
self.cache.get().copied()
109109
}
110110

111111
#[inline]
112-
fn complete(&self, _key: (), value: V, index: DepNodeIndex) {
112+
fn complete(&self, _key: (), _key_hash: u64, value: V, index: DepNodeIndex) {
113113
self.cache.set((value, index)).ok();
114114
}
115115

@@ -147,13 +147,13 @@ where
147147
type Value = V;
148148

149149
#[inline(always)]
150-
fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
150+
fn lookup(&self, key: &K, _key_hash: u64) -> Option<(V, DepNodeIndex)> {
151151
let lock = self.cache.lock_shard_by_hash(key.index() as u64);
152152
if let Some(Some(value)) = lock.get(*key) { Some(*value) } else { None }
153153
}
154154

155155
#[inline]
156-
fn complete(&self, key: K, value: V, index: DepNodeIndex) {
156+
fn complete(&self, key: K, _key_hash: u64, value: V, index: DepNodeIndex) {
157157
let mut lock = self.cache.lock_shard_by_hash(key.index() as u64);
158158
lock.insert(key, (value, index));
159159
}

compiler/rustc_query_system/src/query/plumbing.rs

+58-24
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,27 @@ use crate::query::job::{report_cycle, QueryInfo, QueryJob, QueryJobId, QueryJobI
1212
use crate::query::SerializedDepNodeIndex;
1313
use crate::query::{QueryContext, QueryMap, QuerySideEffects, QueryStackFrame};
1414
use crate::HandleCycleError;
15+
use hashbrown::hash_map::RawEntryMut;
1516
use rustc_data_structures::fingerprint::Fingerprint;
16-
use rustc_data_structures::fx::FxHashMap;
17-
use rustc_data_structures::sharded::Sharded;
17+
use rustc_data_structures::sharded::{self, Sharded};
1818
use rustc_data_structures::stack::ensure_sufficient_stack;
1919
use rustc_data_structures::sync::Lock;
2020
#[cfg(parallel_compiler)]
2121
use rustc_data_structures::{cold_path, sync};
2222
use rustc_errors::{DiagnosticBuilder, ErrorGuaranteed, FatalError};
23+
use rustc_hash::FxHasher;
2324
use rustc_span::{Span, DUMMY_SP};
2425
use std::cell::Cell;
25-
use std::collections::hash_map::Entry;
2626
use std::fmt::Debug;
27+
use std::hash::BuildHasherDefault;
2728
use std::hash::Hash;
2829
use std::mem;
2930
use thin_vec::ThinVec;
3031

3132
use super::QueryConfig;
3233

3334
pub struct QueryState<K> {
34-
active: Sharded<FxHashMap<K, QueryResult>>,
35+
active: Sharded<hashbrown::HashMap<K, QueryResult, BuildHasherDefault<FxHasher>>>,
3536
}
3637

3738
/// Indicates the state of a query for a given key in a query map.
@@ -142,7 +143,7 @@ where
142143
{
143144
/// Completes the query by updating the query cache with the `result`,
144145
/// signals the waiter and forgets the JobOwner, so it won't poison the query
145-
fn complete<C>(self, cache: &C, result: C::Value, dep_node_index: DepNodeIndex)
146+
fn complete<C>(self, cache: &C, key_hash: u64, result: C::Value, dep_node_index: DepNodeIndex)
146147
where
147148
C: QueryCache<Key = K>,
148149
{
@@ -154,13 +155,17 @@ where
154155

155156
// Mark as complete before we remove the job from the active state
156157
// so no other thread can re-execute this query.
157-
cache.complete(key, result, dep_node_index);
158+
cache.complete(key, key_hash, result, dep_node_index);
158159

159160
let job = {
160-
let mut lock = state.active.lock_shard_by_value(&key);
161-
match lock.remove(&key).unwrap() {
162-
QueryResult::Started(job) => job,
163-
QueryResult::Poisoned => panic!(),
161+
let mut lock = state.active.lock_shard_by_hash(key_hash);
162+
163+
match lock.raw_entry_mut().from_key_hashed_nocheck(key_hash, &key) {
164+
RawEntryMut::Vacant(_) => panic!(),
165+
RawEntryMut::Occupied(occupied) => match occupied.remove() {
166+
QueryResult::Started(job) => job,
167+
QueryResult::Poisoned => panic!(),
168+
},
164169
}
165170
};
166171

@@ -209,7 +214,8 @@ where
209214
C: QueryCache,
210215
Tcx: DepContext,
211216
{
212-
match cache.lookup(&key) {
217+
let key_hash = sharded::make_hash(key);
218+
match cache.lookup(&key, key_hash) {
213219
Some((value, index)) => {
214220
tcx.profiler().query_cache_hit(index.into());
215221
tcx.dep_graph().read_index(index);
@@ -246,6 +252,7 @@ fn wait_for_query<Q, Qcx>(
246252
qcx: Qcx,
247253
span: Span,
248254
key: Q::Key,
255+
key_hash: u64,
249256
latch: QueryLatch,
250257
current: Option<QueryJobId>,
251258
) -> (Q::Value, Option<DepNodeIndex>)
@@ -264,7 +271,7 @@ where
264271

265272
match result {
266273
Ok(()) => {
267-
let Some((v, index)) = query.query_cache(qcx).lookup(&key) else {
274+
let Some((v, index)) = query.query_cache(qcx).lookup(&key, key_hash) else {
268275
cold_path(|| {
269276
// We didn't find the query result in the query cache. Check if it was
270277
// poisoned due to a panic instead.
@@ -301,7 +308,8 @@ where
301308
Qcx: QueryContext,
302309
{
303310
let state = query.query_state(qcx);
304-
let mut state_lock = state.active.lock_shard_by_value(&key);
311+
let key_hash = sharded::make_hash(&key);
312+
let mut state_lock = state.active.lock_shard_by_hash(key_hash);
305313

306314
// For the parallel compiler we need to check both the query cache and query state structures
307315
// while holding the state lock to ensure that 1) the query has not yet completed and 2) the
@@ -310,29 +318,46 @@ where
310318
// executing, but another thread may have already completed the query and stores it result
311319
// in the query cache.
312320
if cfg!(parallel_compiler) && qcx.dep_context().sess().threads() > 1 {
313-
if let Some((value, index)) = query.query_cache(qcx).lookup(&key) {
321+
if let Some((value, index)) = query.query_cache(qcx).lookup(&key, key_hash) {
314322
qcx.dep_context().profiler().query_cache_hit(index.into());
315323
return (value, Some(index));
316324
}
317325
}
318326

319327
let current_job_id = qcx.current_query_job();
320328

321-
match state_lock.entry(key) {
322-
Entry::Vacant(entry) => {
329+
match state_lock.raw_table_mut().find_or_find_insert_slot(
330+
key_hash,
331+
|(k, _)| *k == key,
332+
|(k, _)| sharded::make_hash(k),
333+
) {
334+
Err(free_slot) => {
323335
// Nothing has computed or is computing the query, so we start a new job and insert it in the
324336
// state map.
325337
let id = qcx.next_job_id();
326338
let job = QueryJob::new(id, span, current_job_id);
327-
entry.insert(QueryResult::Started(job));
339+
340+
// SAFETY: The slot is still valid as there's
341+
// been no mutation to the table since we hold the lock.
342+
unsafe {
343+
state_lock.raw_table_mut().insert_in_slot(
344+
key_hash,
345+
free_slot,
346+
(key, QueryResult::Started(job)),
347+
);
348+
}
328349

329350
// Drop the lock before we start executing the query
330351
drop(state_lock);
331352

332-
execute_job::<_, _, INCR>(query, qcx, state, key, id, dep_node)
353+
execute_job::<_, _, INCR>(query, qcx, state, key, key_hash, id, dep_node)
333354
}
334-
Entry::Occupied(mut entry) => {
335-
match entry.get_mut() {
355+
Ok(bucket) => {
356+
// SAFETY: We know this bucket is still valid
357+
// since we just got it from `find_or_find_insert_slot`.
358+
let entry = unsafe { &mut bucket.as_mut().1 };
359+
360+
match entry {
336361
QueryResult::Started(job) => {
337362
#[cfg(parallel_compiler)]
338363
if sync::is_dyn_thread_safe() {
@@ -342,7 +367,15 @@ where
342367

343368
// Only call `wait_for_query` if we're using a Rayon thread pool
344369
// as it will attempt to mark the worker thread as blocked.
345-
return wait_for_query(query, qcx, span, key, latch, current_job_id);
370+
return wait_for_query(
371+
query,
372+
qcx,
373+
span,
374+
key,
375+
key_hash,
376+
latch,
377+
current_job_id,
378+
);
346379
}
347380

348381
let id = job.id;
@@ -364,6 +397,7 @@ fn execute_job<Q, Qcx, const INCR: bool>(
364397
qcx: Qcx,
365398
state: &QueryState<Q::Key>,
366399
key: Q::Key,
400+
key_hash: u64,
367401
id: QueryJobId,
368402
dep_node: Option<DepNode>,
369403
) -> (Q::Value, Option<DepNodeIndex>)
@@ -395,7 +429,7 @@ where
395429
// This can't happen, as query feeding adds the very dependencies to the fed query
396430
// as its feeding query had. So if the fed query is red, so is its feeder, which will
397431
// get evaluated first, and re-feed the query.
398-
if let Some((cached_result, _)) = cache.lookup(&key) {
432+
if let Some((cached_result, _)) = cache.lookup(&key, key_hash) {
399433
let Some(hasher) = query.hash_result() else {
400434
panic!(
401435
"no_hash fed query later has its value computed.\n\
@@ -427,7 +461,7 @@ where
427461
}
428462
}
429463
}
430-
job_owner.complete(cache, result, dep_node_index);
464+
job_owner.complete(cache, key_hash, result, dep_node_index);
431465

432466
(result, Some(dep_node_index))
433467
}
@@ -826,7 +860,7 @@ where
826860
{
827861
// We may be concurrently trying both execute and force a query.
828862
// Ensure that only one of them runs the query.
829-
if let Some((_, index)) = query.query_cache(qcx).lookup(&key) {
863+
if let Some((_, index)) = query.query_cache(qcx).lookup(&key, sharded::make_hash(&key)) {
830864
qcx.dep_context().profiler().query_cache_hit(index.into());
831865
return;
832866
}

library/std/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ libc = { version = "0.2.148", default-features = false, features = ['rustc-dep-o
2121
compiler_builtins = { version = "0.1.100" }
2222
profiler_builtins = { path = "../profiler_builtins", optional = true }
2323
unwind = { path = "../unwind" }
24-
hashbrown = { version = "0.14", default-features = false, features = ['rustc-dep-of-std'] }
24+
hashbrown = { version = "0.14", default-features = false, features = ['rustc-dep-of-std', 'raw'] }
2525
std_detect = { path = "../stdarch/crates/std_detect", default-features = false, features = ['rustc-dep-of-std'] }
2626

2727
# Dependencies of the `backtrace` crate

tests/ui/issues/issue-21763.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ LL | foo::<HashMap<Rc<()>, Rc<()>>>();
66
|
77
= help: within `(Rc<()>, Rc<()>)`, the trait `Send` is not implemented for `Rc<()>`
88
= note: required because it appears within the type `(Rc<()>, Rc<()>)`
9-
= note: required for `hashbrown::raw::RawTable<(Rc<()>, Rc<()>)>` to implement `Send`
9+
= note: required for `hashbrown::raw::inner::RawTable<(Rc<()>, Rc<()>)>` to implement `Send`
1010
note: required because it appears within the type `HashMap<Rc<()>, Rc<()>, RandomState>`
1111
--> $HASHBROWN_SRC_LOCATION
1212
note: required because it appears within the type `HashMap<Rc<()>, Rc<()>>`

0 commit comments

Comments
 (0)