Skip to content

Commit 2807fd9

Browse files
committed
Split query execution into hot and cold paths
1 parent 3776f4b commit 2807fd9

File tree

3 files changed

+159
-105
lines changed

3 files changed

+159
-105
lines changed

src/librustc/dep_graph/graph.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,7 @@ impl CurrentDepGraph {
11221122
}
11231123

11241124
impl DepGraphData {
1125+
#[inline]
11251126
fn read_index(&self, source: DepNodeIndex) {
11261127
ty::tls::with_context_opt(|icx| {
11271128
let icx = if let Some(icx) = icx { icx } else { return };

src/librustc/ty/context.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,6 +1684,7 @@ pub mod tls {
16841684

16851685
/// Gets the pointer to the current `ImplicitCtxt`.
16861686
#[cfg(not(parallel_compiler))]
1687+
#[inline]
16871688
fn get_tlv() -> usize {
16881689
TLV.with(|tlv| tlv.get())
16891690
}

src/librustc/ty/query/plumbing.rs

Lines changed: 157 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,8 @@ use crate::ty::{self, TyCtxt};
1212
#[cfg(not(parallel_compiler))]
1313
use rustc_data_structures::cold_path;
1414
use rustc_data_structures::fx::{FxHashMap, FxHasher};
15-
#[cfg(parallel_compiler)]
16-
use rustc_data_structures::profiling::TimingGuard;
1715
use rustc_data_structures::sharded::Sharded;
18-
use rustc_data_structures::sync::Lock;
16+
use rustc_data_structures::sync::{Lock, LockGuard};
1917
use rustc_data_structures::thin_vec::ThinVec;
2018
use rustc_errors::{struct_span_err, Diagnostic, DiagnosticBuilder, FatalError, Handler, Level};
2119
use rustc_span::source_map::DUMMY_SP;
@@ -70,6 +68,13 @@ impl<'tcx, M: QueryConfig<'tcx>> Default for QueryCache<'tcx, M> {
7068
}
7169
}
7270

71+
/// A type representing the responsibility to execute the job in the `job` field.
72+
/// This will poison the relevant query if dropped.
73+
pub(super) struct QueryLookup<'tcx, Q: QueryDescription<'tcx>> {
74+
shard: usize,
75+
lock: LockGuard<'tcx, QueryCache<'tcx, Q>>,
76+
}
77+
7378
/// A type representing the responsibility to execute the job in the `job` field.
7479
/// This will poison the relevant query if dropped.
7580
pub(super) struct JobOwner<'a, 'tcx, Q: QueryDescription<'tcx>> {
@@ -87,113 +92,80 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
8792
/// This function is inlined because that results in a noticeable speed-up
8893
/// for some compile-time benchmarks.
8994
#[inline(always)]
90-
pub(super) fn try_get(tcx: TyCtxt<'tcx>, span: Span, key: &Q::Key) -> TryGetJob<'a, 'tcx, Q> {
91-
// Handling the `query_blocked_prof_timer` is a bit weird because of the
92-
// control flow in this function: Blocking is implemented by
93-
// awaiting a running job and, once that is done, entering the loop below
94-
// again from the top. In that second iteration we will hit the
95-
// cache which provides us with the information we need for
96-
// finishing the "query-blocked" event.
97-
//
98-
// We thus allocate `query_blocked_prof_timer` outside the loop,
99-
// initialize it during the first iteration and finish it during the
100-
// second iteration.
101-
#[cfg(parallel_compiler)]
102-
let mut query_blocked_prof_timer: Option<TimingGuard<'_>> = None;
103-
104-
let cache = Q::query_cache(tcx);
105-
loop {
106-
// We compute the key's hash once and then use it for both the
107-
// shard lookup and the hashmap lookup. This relies on the fact
108-
// that both of them use `FxHasher`.
109-
let mut state = FxHasher::default();
110-
key.hash(&mut state);
111-
let key_hash = state.finish();
112-
113-
let shard = cache.get_shard_index_by_hash(key_hash);
114-
let mut lock_guard = cache.get_shard_by_index(shard).lock();
115-
let lock = &mut *lock_guard;
116-
117-
if let Some((_, value)) =
118-
lock.results.raw_entry().from_key_hashed_nocheck(key_hash, key)
119-
{
120-
if unlikely!(tcx.prof.enabled()) {
121-
tcx.prof.query_cache_hit(value.index.into());
122-
123-
#[cfg(parallel_compiler)]
124-
{
125-
if let Some(prof_timer) = query_blocked_prof_timer.take() {
126-
prof_timer.finish_with_query_invocation_id(value.index.into());
127-
}
128-
}
129-
}
95+
pub(super) fn try_start(
96+
tcx: TyCtxt<'tcx>,
97+
span: Span,
98+
key: &Q::Key,
99+
mut lookup: QueryLookup<'tcx, Q>,
100+
) -> TryGetJob<'a, 'tcx, Q> {
101+
let lock = &mut *lookup.lock;
102+
103+
let (latch, mut _query_blocked_prof_timer) = match lock.active.entry((*key).clone()) {
104+
Entry::Occupied(mut entry) => {
105+
match entry.get_mut() {
106+
QueryResult::Started(job) => {
107+
// For parallel queries, we'll block and wait until the query running
108+
// in another thread has completed. Record how long we wait in the
109+
// self-profiler.
110+
let _query_blocked_prof_timer = if cfg!(parallel_compiler) {
111+
Some(tcx.prof.query_blocked())
112+
} else {
113+
None
114+
};
130115

131-
let result = (value.value.clone(), value.index);
132-
#[cfg(debug_assertions)]
133-
{
134-
lock.cache_hits += 1;
116+
// Create the id of the job we're waiting for
117+
let id = QueryJobId::new(job.id, lookup.shard, Q::dep_kind());
118+
119+
(job.latch(id), _query_blocked_prof_timer)
120+
}
121+
QueryResult::Poisoned => FatalError.raise(),
135122
}
136-
return TryGetJob::JobCompleted(result);
137123
}
124+
Entry::Vacant(entry) => {
125+
// No job entry for this query. Return a new one to be started later.
138126

139-
let latch = match lock.active.entry((*key).clone()) {
140-
Entry::Occupied(mut entry) => {
141-
match entry.get_mut() {
142-
QueryResult::Started(job) => {
143-
// For parallel queries, we'll block and wait until the query running
144-
// in another thread has completed. Record how long we wait in the
145-
// self-profiler.
146-
#[cfg(parallel_compiler)]
147-
{
148-
query_blocked_prof_timer = Some(tcx.prof.query_blocked());
149-
}
127+
// Generate an id unique within this shard.
128+
let id = lock.jobs.checked_add(1).unwrap();
129+
lock.jobs = id;
130+
let id = QueryShardJobId(NonZeroU32::new(id).unwrap());
150131

151-
// Create the id of the job we're waiting for
152-
let id = QueryJobId::new(job.id, shard, Q::dep_kind());
132+
let global_id = QueryJobId::new(id, lookup.shard, Q::dep_kind());
153133

154-
job.latch(id)
155-
}
156-
QueryResult::Poisoned => FatalError.raise(),
157-
}
158-
}
159-
Entry::Vacant(entry) => {
160-
// No job entry for this query. Return a new one to be started later.
161-
162-
// Generate an id unique within this shard.
163-
let id = lock.jobs.checked_add(1).unwrap();
164-
lock.jobs = id;
165-
let id = QueryShardJobId(NonZeroU32::new(id).unwrap());
134+
let job = tls::with_related_context(tcx, |icx| QueryJob::new(id, span, icx.query));
166135

167-
let global_id = QueryJobId::new(id, shard, Q::dep_kind());
136+
entry.insert(QueryResult::Started(job));
168137

169-
let job =
170-
tls::with_related_context(tcx, |icx| QueryJob::new(id, span, icx.query));
138+
let owner =
139+
JobOwner { cache: Q::query_cache(tcx), id: global_id, key: (*key).clone() };
140+
return TryGetJob::NotYetStarted(owner);
141+
}
142+
};
143+
mem::drop(lookup.lock);
171144

172-
entry.insert(QueryResult::Started(job));
145+
// If we are single-threaded we know that we have cycle error,
146+
// so we just return the error.
147+
#[cfg(not(parallel_compiler))]
148+
return TryGetJob::Cycle(cold_path(|| {
149+
Q::handle_cycle_error(tcx, latch.find_cycle_in_stack(tcx, span))
150+
}));
173151

174-
let owner = JobOwner { cache, id: global_id, key: (*key).clone() };
175-
return TryGetJob::NotYetStarted(owner);
176-
}
177-
};
178-
mem::drop(lock_guard);
152+
// With parallel queries we might just have to wait on some other
153+
// thread.
154+
#[cfg(parallel_compiler)]
155+
{
156+
let result = latch.wait_on(tcx, span);
179157

180-
// If we are single-threaded we know that we have cycle error,
181-
// so we just return the error.
182-
#[cfg(not(parallel_compiler))]
183-
return TryGetJob::Cycle(cold_path(|| {
184-
Q::handle_cycle_error(tcx, latch.find_cycle_in_stack(tcx, span))
185-
}));
158+
if let Err(cycle) = result {
159+
return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
160+
}
186161

187-
// With parallel queries we might just have to wait on some other
188-
// thread.
189-
#[cfg(parallel_compiler)]
190-
{
191-
let result = latch.wait_on(tcx, span);
162+
let cached = tcx.try_get_cached::<Q>(key).0.unwrap();
192163

193-
if let Err(cycle) = result {
194-
return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
195-
}
164+
if let Some(prof_timer) = _query_blocked_prof_timer.take() {
165+
prof_timer.finish_with_query_invocation_id(cached.1.into());
196166
}
167+
168+
return TryGetJob::JobCompleted(cached);
197169
}
198170
}
199171

@@ -269,6 +241,7 @@ pub(super) enum TryGetJob<'a, 'tcx, D: QueryDescription<'tcx>> {
269241
/// The query was already completed.
270242
/// Returns the result of the query and its dep-node index
271243
/// if it succeeded or a cycle error if it failed.
244+
#[cfg(parallel_compiler)]
272245
JobCompleted((D::Value, DepNodeIndex)),
273246

274247
/// Trying to execute the query resulted in a cycle.
@@ -396,13 +369,78 @@ impl<'tcx> TyCtxt<'tcx> {
396369
eprintln!("end of query stack");
397370
}
398371

399-
#[inline(never)]
400-
pub(super) fn get_query<Q: QueryDescription<'tcx>>(self, span: Span, key: Q::Key) -> Q::Value {
372+
/// Either gets a `JobOwner` corresponding the query, allowing us to
373+
/// start executing the query, or returns with the result of the query.
374+
/// If the query is executing elsewhere, this will wait for it.
375+
/// If the query panicked, this will silently panic.
376+
///
377+
/// This function is inlined because that results in a noticeable speed-up
378+
/// for some compile-time benchmarks.
379+
#[inline]
380+
fn try_get_cached<Q: QueryDescription<'tcx>>(
381+
self,
382+
key: &Q::Key,
383+
) -> (Option<(Q::Value, DepNodeIndex)>, QueryLookup<'tcx, Q>) {
384+
let cache = Q::query_cache(self);
385+
386+
// We compute the key's hash once and then use it for both the
387+
// shard lookup and the hashmap lookup. This relies on the fact
388+
// that both of them use `FxHasher`.
389+
let mut state = FxHasher::default();
390+
key.hash(&mut state);
391+
let key_hash = state.finish();
392+
393+
let shard = cache.get_shard_index_by_hash(key_hash);
394+
let mut lock_guard = cache.get_shard_by_index(shard).lock();
395+
let lock = &mut *lock_guard;
396+
let cache_hits = &mut lock.cache_hits;
397+
398+
let result =
399+
lock.results.raw_entry().from_key_hashed_nocheck(key_hash, key).map(|(_, value)| {
400+
if unlikely!(self.prof.enabled()) {
401+
self.prof.query_cache_hit(value.index.into());
402+
}
403+
404+
#[cfg(debug_assertions)]
405+
{
406+
*cache_hits += 1;
407+
}
408+
409+
(value.value.clone(), value.index)
410+
});
411+
412+
(result, QueryLookup { lock: lock_guard, shard })
413+
}
414+
415+
#[inline]
416+
pub(super) fn get_query<Q: QueryDescription<'tcx> + 'tcx>(
417+
self,
418+
span: Span,
419+
key: Q::Key,
420+
) -> Q::Value {
401421
debug!("ty::query::get_query<{}>(key={:?}, span={:?})", Q::NAME, key, span);
402422

403-
let job = match JobOwner::try_get(self, span, &key) {
423+
let (cached, lookup) = self.try_get_cached::<Q>(&key);
424+
425+
if let Some((v, index)) = cached {
426+
self.dep_graph.read_index(index);
427+
return v;
428+
}
429+
430+
self.try_execute_query(span, key, lookup)
431+
}
432+
433+
#[inline(never)]
434+
pub(super) fn try_execute_query<Q: QueryDescription<'tcx>>(
435+
self,
436+
span: Span,
437+
key: Q::Key,
438+
lookup: QueryLookup<'tcx, Q>,
439+
) -> Q::Value {
440+
let job = match JobOwner::try_start(self, span, &key, lookup) {
404441
TryGetJob::NotYetStarted(job) => job,
405442
TryGetJob::Cycle(result) => return result,
443+
#[cfg(parallel_compiler)]
406444
TryGetJob::JobCompleted((v, index)) => {
407445
self.dep_graph.read_index(index);
408446
return v;
@@ -615,7 +653,7 @@ impl<'tcx> TyCtxt<'tcx> {
615653
/// side-effects -- e.g., in order to report errors for erroneous programs.
616654
///
617655
/// Note: The optimization is only available during incr. comp.
618-
pub(super) fn ensure_query<Q: QueryDescription<'tcx>>(self, key: Q::Key) -> () {
656+
pub(super) fn ensure_query<Q: QueryDescription<'tcx> + 'tcx>(self, key: Q::Key) -> () {
619657
if Q::EVAL_ALWAYS {
620658
let _ = self.get_query::<Q>(DUMMY_SP, key);
621659
return;
@@ -643,12 +681,26 @@ impl<'tcx> TyCtxt<'tcx> {
643681
}
644682

645683
#[allow(dead_code)]
646-
fn force_query<Q: QueryDescription<'tcx>>(self, key: Q::Key, span: Span, dep_node: DepNode) {
684+
fn force_query<Q: QueryDescription<'tcx> + 'tcx>(
685+
self,
686+
key: Q::Key,
687+
span: Span,
688+
dep_node: DepNode,
689+
) {
647690
// We may be concurrently trying both execute and force a query.
648691
// Ensure that only one of them runs the query.
649-
let job = match JobOwner::try_get(self, span, &key) {
692+
693+
let (cached, lookup) = self.try_get_cached::<Q>(&key);
694+
695+
if cached.is_some() {
696+
return;
697+
}
698+
699+
let job = match JobOwner::try_start(self, span, &key, lookup) {
650700
TryGetJob::NotYetStarted(job) => job,
651-
TryGetJob::Cycle(_) | TryGetJob::JobCompleted(_) => return,
701+
TryGetJob::Cycle(_) => return,
702+
#[cfg(parallel_compiler)]
703+
TryGetJob::JobCompleted(_) => return,
652704
};
653705
self.force_query_with_job::<Q>(key, job, dep_node);
654706
}
@@ -1065,7 +1117,7 @@ macro_rules! define_queries_inner {
10651117
}
10661118

10671119
$($(#[$attr])*
1068-
#[inline(always)]
1120+
#[inline]
10691121
pub fn $name(self, key: $K) -> $V {
10701122
self.at(DUMMY_SP).$name(key)
10711123
})*
@@ -1102,7 +1154,7 @@ macro_rules! define_queries_inner {
11021154

11031155
impl TyCtxtAt<$tcx> {
11041156
$($(#[$attr])*
1105-
#[inline(always)]
1157+
#[inline]
11061158
pub fn $name(self, key: $K) -> $V {
11071159
self.tcx.get_query::<queries::$name<'_>>(self.span, key)
11081160
})*

0 commit comments

Comments
 (0)