Skip to content

Commit d898dc6

Browse files
committed
Auto merge of #115747 - Zoxc:query-hashes, r=<try>
Optimize hash map operations in the query system This optimizes hash map operations in the query system by explicitly passing hashes and using more optimal operations. `find_or_find_insert_slot` in particular saves a hash table lookup over `entry`. It's not yet available in a safe API, but will be in rust-lang/hashbrown#466. <table><tr><td rowspan="2">Benchmark</td><td colspan="1"><b>Before</b></th><td colspan="2"><b>After</b></th></tr><tr><td align="right">Time</td><td align="right">Time</td><td align="right">%</th></tr><tr><td>🟣 <b>clap</b>:check</td><td align="right">1.6189s</td><td align="right">1.6129s</td><td align="right"> -0.37%</td></tr><tr><td>🟣 <b>hyper</b>:check</td><td align="right">0.2353s</td><td align="right">0.2337s</td><td align="right"> -0.67%</td></tr><tr><td>🟣 <b>regex</b>:check</td><td align="right">0.9344s</td><td align="right">0.9289s</td><td align="right"> -0.59%</td></tr><tr><td>🟣 <b>syn</b>:check</td><td align="right">1.4693s</td><td align="right">1.4652s</td><td align="right"> -0.28%</td></tr><tr><td>🟣 <b>syntex_syntax</b>:check</td><td align="right">5.6606s</td><td align="right">5.6439s</td><td align="right"> -0.30%</td></tr><tr><td>Total</td><td align="right">9.9185s</td><td align="right">9.8846s</td><td align="right"> -0.34%</td></tr><tr><td>Summary</td><td align="right">1.0000s</td><td align="right">0.9956s</td><td align="right"> -0.44%</td></tr></table> r? `@cjgillot`
2 parents 0fd7ce9 + 936d8ad commit d898dc6

File tree

7 files changed

+91
-41
lines changed

7 files changed

+91
-41
lines changed

Cargo.lock

+3
Original file line numberDiff line numberDiff line change
@@ -3600,6 +3600,7 @@ dependencies = [
36003600
"cfg-if",
36013601
"elsa",
36023602
"ena",
3603+
"hashbrown 0.14.0",
36033604
"indexmap 2.0.0",
36043605
"itertools",
36053606
"jobserver",
@@ -4328,7 +4329,9 @@ dependencies = [
43284329
name = "rustc_query_system"
43294330
version = "0.0.0"
43304331
dependencies = [
4332+
"hashbrown 0.14.0",
43314333
"parking_lot 0.12.1",
4334+
"rustc-hash",
43324335
"rustc-rayon-core",
43334336
"rustc_ast",
43344337
"rustc_data_structures",

compiler/rustc_data_structures/Cargo.toml

+5
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ indexmap = { version = "2.0.0" }
1414
jobserver_crate = { version = "0.1.13", package = "jobserver" }
1515
libc = "0.2"
1616
measureme = "10.0.0"
17+
hashbrown = { version = "0.14", default-features = false, features = [
18+
"raw",
19+
"inline-more",
20+
"nightly",
21+
] }
1722
rustc-rayon-core = { version = "0.5.0", optional = true }
1823
rustc-rayon = { version = "0.5.0", optional = true }
1924
rustc_arena = { path = "../rustc_arena" }

compiler/rustc_data_structures/src/marker.rs

+1
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ cfg_if!(
8787
[std::sync::mpsc::Sender<T> where T: DynSend]
8888
[std::sync::Arc<T> where T: ?Sized + DynSync + DynSend]
8989
[std::sync::LazyLock<T, F> where T: DynSend, F: DynSend]
90+
[hashbrown::HashMap<K, V, S> where K: DynSend, V: DynSend, S: DynSend]
9091
[std::collections::HashSet<K, S> where K: DynSend, S: DynSend]
9192
[std::collections::HashMap<K, V, S> where K: DynSend, V: DynSend, S: DynSend]
9293
[std::collections::BTreeMap<K, V, A> where K: DynSend, V: DynSend, A: std::alloc::Allocator + Clone + DynSend]

compiler/rustc_middle/src/query/plumbing.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,8 @@ macro_rules! define_feedable {
524524
&value,
525525
hash_result!([$($modifiers)*]),
526526
);
527-
cache.complete(key, erased, dep_node_index);
527+
let key_hash = rustc_data_structures::sharded::make_hash(&key);
528+
cache.complete(key, key_hash, erased, dep_node_index);
528529
}
529530
}
530531
}

compiler/rustc_query_system/Cargo.toml

+6
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,13 @@ edition = "2021"
66
[lib]
77

88
[dependencies]
9+
hashbrown = { version = "0.14", default-features = false, features = [
10+
"raw",
11+
"inline-more",
12+
"nightly",
13+
] }
914
parking_lot = "0.12"
15+
rustc-hash = "1.1.0"
1016
rustc_ast = { path = "../rustc_ast" }
1117
rustc_data_structures = { path = "../rustc_data_structures" }
1218
rustc_errors = { path = "../rustc_errors" }

compiler/rustc_query_system/src/query/caches.rs

+16-16
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
use crate::dep_graph::DepNodeIndex;
2-
3-
use rustc_data_structures::fx::FxHashMap;
42
use rustc_data_structures::sharded::{self, Sharded};
53
use rustc_data_structures::sync::OnceLock;
4+
use rustc_hash::FxHasher;
65
use rustc_index::{Idx, IndexVec};
76
use std::fmt::Debug;
7+
use std::hash::BuildHasherDefault;
88
use std::hash::Hash;
99
use std::marker::PhantomData;
1010

@@ -19,9 +19,9 @@ pub trait QueryCache: Sized {
1919
type Value: Copy;
2020

2121
/// Checks if the query is already computed and in the cache.
22-
fn lookup(&self, key: &Self::Key) -> Option<(Self::Value, DepNodeIndex)>;
22+
fn lookup(&self, key: &Self::Key, key_hash: u64) -> Option<(Self::Value, DepNodeIndex)>;
2323

24-
fn complete(&self, key: Self::Key, value: Self::Value, index: DepNodeIndex);
24+
fn complete(&self, key: Self::Key, key_hash: u64, value: Self::Value, index: DepNodeIndex);
2525

2626
fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex));
2727
}
@@ -35,7 +35,7 @@ impl<'tcx, K: Eq + Hash, V: 'tcx> CacheSelector<'tcx, V> for DefaultCacheSelecto
3535
}
3636

3737
pub struct DefaultCache<K, V> {
38-
cache: Sharded<FxHashMap<K, (V, DepNodeIndex)>>,
38+
cache: Sharded<hashbrown::HashMap<K, (V, DepNodeIndex), BuildHasherDefault<FxHasher>>>,
3939
}
4040

4141
impl<K, V> Default for DefaultCache<K, V> {
@@ -53,20 +53,20 @@ where
5353
type Value = V;
5454

5555
#[inline(always)]
56-
fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
57-
let key_hash = sharded::make_hash(key);
56+
fn lookup(&self, key: &K, key_hash: u64) -> Option<(V, DepNodeIndex)> {
5857
let lock = self.cache.lock_shard_by_hash(key_hash);
5958
let result = lock.raw_entry().from_key_hashed_nocheck(key_hash, key);
6059

6160
if let Some((_, value)) = result { Some(*value) } else { None }
6261
}
6362

6463
#[inline]
65-
fn complete(&self, key: K, value: V, index: DepNodeIndex) {
66-
let mut lock = self.cache.lock_shard_by_value(&key);
67-
// We may be overwriting another value. This is all right, since the dep-graph
68-
// will check that the fingerprint matches.
69-
lock.insert(key, (value, index));
64+
fn complete(&self, key: K, key_hash: u64, value: V, index: DepNodeIndex) {
65+
let mut lock = self.cache.lock_shard_by_hash(key_hash);
66+
// Use the raw table API since we know that the
67+
// key does not exist and we already have the hash.
68+
lock.raw_table_mut()
69+
.insert(key_hash, (key, (value, index)), |(k, _)| sharded::make_hash(k));
7070
}
7171

7272
fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
@@ -104,12 +104,12 @@ where
104104
type Value = V;
105105

106106
#[inline(always)]
107-
fn lookup(&self, _key: &()) -> Option<(V, DepNodeIndex)> {
107+
fn lookup(&self, _key: &(), _key_hash: u64) -> Option<(V, DepNodeIndex)> {
108108
self.cache.get().copied()
109109
}
110110

111111
#[inline]
112-
fn complete(&self, _key: (), value: V, index: DepNodeIndex) {
112+
fn complete(&self, _key: (), _key_hash: u64, value: V, index: DepNodeIndex) {
113113
self.cache.set((value, index)).ok();
114114
}
115115

@@ -147,13 +147,13 @@ where
147147
type Value = V;
148148

149149
#[inline(always)]
150-
fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
150+
fn lookup(&self, key: &K, _key_hash: u64) -> Option<(V, DepNodeIndex)> {
151151
let lock = self.cache.lock_shard_by_hash(key.index() as u64);
152152
if let Some(Some(value)) = lock.get(*key) { Some(*value) } else { None }
153153
}
154154

155155
#[inline]
156-
fn complete(&self, key: K, value: V, index: DepNodeIndex) {
156+
fn complete(&self, key: K, _key_hash: u64, value: V, index: DepNodeIndex) {
157157
let mut lock = self.cache.lock_shard_by_hash(key.index() as u64);
158158
lock.insert(key, (value, index));
159159
}

compiler/rustc_query_system/src/query/plumbing.rs

+58-24
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,27 @@ use crate::query::job::{report_cycle, QueryInfo, QueryJob, QueryJobId, QueryJobI
1212
use crate::query::SerializedDepNodeIndex;
1313
use crate::query::{QueryContext, QueryMap, QuerySideEffects, QueryStackFrame};
1414
use crate::HandleCycleError;
15+
use hashbrown::hash_map::RawEntryMut;
1516
use rustc_data_structures::fingerprint::Fingerprint;
16-
use rustc_data_structures::fx::FxHashMap;
17-
use rustc_data_structures::sharded::Sharded;
17+
use rustc_data_structures::sharded::{self, Sharded};
1818
use rustc_data_structures::stack::ensure_sufficient_stack;
1919
use rustc_data_structures::sync::Lock;
2020
#[cfg(parallel_compiler)]
2121
use rustc_data_structures::{cold_path, sync};
2222
use rustc_errors::{DiagnosticBuilder, ErrorGuaranteed, FatalError};
23+
use rustc_hash::FxHasher;
2324
use rustc_span::{Span, DUMMY_SP};
2425
use std::cell::Cell;
25-
use std::collections::hash_map::Entry;
2626
use std::fmt::Debug;
27+
use std::hash::BuildHasherDefault;
2728
use std::hash::Hash;
2829
use std::mem;
2930
use thin_vec::ThinVec;
3031

3132
use super::QueryConfig;
3233

3334
pub struct QueryState<K, D: DepKind> {
34-
active: Sharded<FxHashMap<K, QueryResult<D>>>,
35+
active: Sharded<hashbrown::HashMap<K, QueryResult<D>, BuildHasherDefault<FxHasher>>>,
3536
}
3637

3738
/// Indicates the state of a query for a given key in a query map.
@@ -143,7 +144,7 @@ where
143144
{
144145
/// Completes the query by updating the query cache with the `result`,
145146
/// signals the waiter and forgets the JobOwner, so it won't poison the query
146-
fn complete<C>(self, cache: &C, result: C::Value, dep_node_index: DepNodeIndex)
147+
fn complete<C>(self, cache: &C, key_hash: u64, result: C::Value, dep_node_index: DepNodeIndex)
147148
where
148149
C: QueryCache<Key = K>,
149150
{
@@ -155,13 +156,17 @@ where
155156

156157
// Mark as complete before we remove the job from the active state
157158
// so no other thread can re-execute this query.
158-
cache.complete(key, result, dep_node_index);
159+
cache.complete(key, key_hash, result, dep_node_index);
159160

160161
let job = {
161-
let mut lock = state.active.lock_shard_by_value(&key);
162-
match lock.remove(&key).unwrap() {
163-
QueryResult::Started(job) => job,
164-
QueryResult::Poisoned => panic!(),
162+
let mut lock = state.active.lock_shard_by_hash(key_hash);
163+
164+
match lock.raw_entry_mut().from_key_hashed_nocheck(key_hash, &key) {
165+
RawEntryMut::Vacant(_) => panic!(),
166+
RawEntryMut::Occupied(occupied) => match occupied.remove() {
167+
QueryResult::Started(job) => job,
168+
QueryResult::Poisoned => panic!(),
169+
},
165170
}
166171
};
167172

@@ -211,7 +216,8 @@ where
211216
C: QueryCache,
212217
Tcx: DepContext,
213218
{
214-
match cache.lookup(&key) {
219+
let key_hash = sharded::make_hash(key);
220+
match cache.lookup(&key, key_hash) {
215221
Some((value, index)) => {
216222
tcx.profiler().query_cache_hit(index.into());
217223
tcx.dep_graph().read_index(index);
@@ -248,6 +254,7 @@ fn wait_for_query<Q, Qcx>(
248254
qcx: Qcx,
249255
span: Span,
250256
key: Q::Key,
257+
key_hash: u64,
251258
latch: QueryLatch<Qcx::DepKind>,
252259
current: Option<QueryJobId>,
253260
) -> (Q::Value, Option<DepNodeIndex>)
@@ -266,7 +273,7 @@ where
266273

267274
match result {
268275
Ok(()) => {
269-
let Some((v, index)) = query.query_cache(qcx).lookup(&key) else {
276+
let Some((v, index)) = query.query_cache(qcx).lookup(&key, key_hash) else {
270277
cold_path(|| {
271278
// We didn't find the query result in the query cache. Check if it was
272279
// poisoned due to a panic instead.
@@ -303,7 +310,8 @@ where
303310
Qcx: QueryContext,
304311
{
305312
let state = query.query_state(qcx);
306-
let mut state_lock = state.active.lock_shard_by_value(&key);
313+
let key_hash = sharded::make_hash(&key);
314+
let mut state_lock = state.active.lock_shard_by_hash(key_hash);
307315

308316
// For the parallel compiler we need to check both the query cache and query state structures
309317
// while holding the state lock to ensure that 1) the query has not yet completed and 2) the
@@ -312,29 +320,46 @@ where
312320
// executing, but another thread may have already completed the query and stores it result
313321
// in the query cache.
314322
if cfg!(parallel_compiler) && qcx.dep_context().sess().threads() > 1 {
315-
if let Some((value, index)) = query.query_cache(qcx).lookup(&key) {
323+
if let Some((value, index)) = query.query_cache(qcx).lookup(&key, key_hash) {
316324
qcx.dep_context().profiler().query_cache_hit(index.into());
317325
return (value, Some(index));
318326
}
319327
}
320328

321329
let current_job_id = qcx.current_query_job();
322330

323-
match state_lock.entry(key) {
324-
Entry::Vacant(entry) => {
331+
match state_lock.raw_table_mut().find_or_find_insert_slot(
332+
key_hash,
333+
|(k, _)| *k == key,
334+
|(k, _)| sharded::make_hash(k),
335+
) {
336+
Err(free_slot) => {
325337
// Nothing has computed or is computing the query, so we start a new job and insert it in the
326338
// state map.
327339
let id = qcx.next_job_id();
328340
let job = QueryJob::new(id, span, current_job_id);
329-
entry.insert(QueryResult::Started(job));
341+
342+
// SAFETY: The slot is still valid as there's
343+
// been no mutation to the table since we hold the lock.
344+
unsafe {
345+
state_lock.raw_table_mut().insert_in_slot(
346+
key_hash,
347+
free_slot,
348+
(key, QueryResult::Started(job)),
349+
);
350+
}
330351

331352
// Drop the lock before we start executing the query
332353
drop(state_lock);
333354

334-
execute_job::<_, _, INCR>(query, qcx, state, key, id, dep_node)
355+
execute_job::<_, _, INCR>(query, qcx, state, key, key_hash, id, dep_node)
335356
}
336-
Entry::Occupied(mut entry) => {
337-
match entry.get_mut() {
357+
Ok(bucket) => {
358+
// SAFETY: We know this bucket is still valid
359+
// since we just got it from `find_or_find_insert_slot`.
360+
let entry = unsafe { &mut bucket.as_mut().1 };
361+
362+
match entry {
338363
QueryResult::Started(job) => {
339364
#[cfg(parallel_compiler)]
340365
if sync::is_dyn_thread_safe() {
@@ -344,7 +369,15 @@ where
344369

345370
// Only call `wait_for_query` if we're using a Rayon thread pool
346371
// as it will attempt to mark the worker thread as blocked.
347-
return wait_for_query(query, qcx, span, key, latch, current_job_id);
372+
return wait_for_query(
373+
query,
374+
qcx,
375+
span,
376+
key,
377+
key_hash,
378+
latch,
379+
current_job_id,
380+
);
348381
}
349382

350383
let id = job.id;
@@ -366,6 +399,7 @@ fn execute_job<Q, Qcx, const INCR: bool>(
366399
qcx: Qcx,
367400
state: &QueryState<Q::Key, Qcx::DepKind>,
368401
key: Q::Key,
402+
key_hash: u64,
369403
id: QueryJobId,
370404
dep_node: Option<DepNode<Qcx::DepKind>>,
371405
) -> (Q::Value, Option<DepNodeIndex>)
@@ -397,7 +431,7 @@ where
397431
// This can't happen, as query feeding adds the very dependencies to the fed query
398432
// as its feeding query had. So if the fed query is red, so is its feeder, which will
399433
// get evaluated first, and re-feed the query.
400-
if let Some((cached_result, _)) = cache.lookup(&key) {
434+
if let Some((cached_result, _)) = cache.lookup(&key, key_hash) {
401435
let Some(hasher) = query.hash_result() else {
402436
panic!(
403437
"no_hash fed query later has its value computed.\n\
@@ -429,7 +463,7 @@ where
429463
}
430464
}
431465
}
432-
job_owner.complete(cache, result, dep_node_index);
466+
job_owner.complete(cache, key_hash, result, dep_node_index);
433467

434468
(result, Some(dep_node_index))
435469
}
@@ -832,7 +866,7 @@ pub fn force_query<Q, Qcx>(
832866
{
833867
// We may be concurrently trying both execute and force a query.
834868
// Ensure that only one of them runs the query.
835-
if let Some((_, index)) = query.query_cache(qcx).lookup(&key) {
869+
if let Some((_, index)) = query.query_cache(qcx).lookup(&key, sharded::make_hash(&key)) {
836870
qcx.dep_context().profiler().query_cache_hit(index.into());
837871
return;
838872
}

0 commit comments

Comments
 (0)