Skip to content

Commit 6c5d278

Browse files
committed
Auto merge of rust-lang#116375 - Zoxc:lazy-index, r=<try>
Create the previous dep graph index on a background thread This changes `SerializedDepGraph.index` to be computed on-demand per dep kind. This means we can immediately start using queries without waiting for the entire index to be constructed. Additionally a background thread is started which computes the entire index, effectively off-loading most of the index construction to the background thread. <table><tr><td rowspan="2">Benchmark</td><td colspan="1"><b>Before</b></th><td colspan="2"><b>After</b></th><td colspan="1"><b>Before</b></th><td colspan="2"><b>After</b></th></tr><tr><td align="right">Time</td><td align="right">Time</td><td align="right">%</th><td align="right">Memory</td><td align="right">Memory</td><td align="right">%</th></tr><tr><td>🟣 <b>clap</b>:check:unchanged</td><td align="right">0.4259s</td><td align="right">0.4225s</td><td align="right"> -0.79%</td><td align="right">89.65 MiB</td><td align="right">90.08 MiB</td><td align="right"> 0.48%</td></tr><tr><td>🟣 <b>hyper</b>:check:unchanged</td><td align="right">0.1425s</td><td align="right">0.1417s</td><td align="right"> -0.53%</td><td align="right">47.85 MiB</td><td align="right">47.91 MiB</td><td align="right"> 0.13%</td></tr><tr><td>🟣 <b>regex</b>:check:unchanged</td><td align="right">0.3188s</td><td align="right">0.3157s</td><td align="right"> -0.97%</td><td align="right">71.09 MiB</td><td align="right">71.58 MiB</td><td align="right"> 0.69%</td></tr><tr><td>🟣 <b>syn</b>:check:unchanged</td><td align="right">0.5895s</td><td align="right">0.5813s</td><td align="right">💚 -1.38%</td><td align="right">101.68 MiB</td><td align="right">102.15 MiB</td><td align="right"> 0.47%</td></tr><tr><td>🟣 <b>syntex_syntax</b>:check:unchanged</td><td align="right">1.4392s</td><td align="right">1.4361s</td><td align="right"> -0.22%</td><td align="right">200.62 MiB</td><td align="right">201.68 MiB</td><td align="right"> 0.53%</td></tr><tr><td>Total</td><td align="right">2.9158s</td><td align="right">2.8974s</td><td align="right"> -0.63%</td><td align="right">510.89 MiB</td><td align="right">513.40 MiB</td><td align="right"> 0.49%</td></tr><tr><td>Summary</td><td align="right">1.0000s</td><td align="right">0.9922s</td><td align="right"> -0.78%</td><td align="right">1 byte</td><td align="right">1.00 bytes</td><td align="right"> 0.46%</td></tr></table> <table><tr><td rowspan="2">Benchmark</td><td colspan="1"><b>Before</b></th><td colspan="2"><b>After</b></th><td colspan="1"><b>Before</b></th><td colspan="2"><b>After</b></th></tr><tr><td align="right">Time</td><td align="right">Time</td><td align="right">%</th><td align="right">Memory</td><td align="right">Memory</td><td align="right">%</th></tr><tr><td>🟠 <b>clap</b>:debug:unchanged</td><td align="right">1.0753s</td><td align="right">1.0684s</td><td align="right"> -0.64%</td><td align="right">142.80 MiB</td><td align="right">142.72 MiB</td><td align="right"> -0.05%</td></tr><tr><td>🟠 <b>hyper</b>:debug:unchanged</td><td align="right">0.2857s</td><td align="right">0.2847s</td><td align="right"> -0.35%</td><td align="right">63.06 MiB</td><td align="right">63.15 MiB</td><td align="right"> 0.13%</td></tr><tr><td>🟠 <b>regex</b>:debug:unchanged</td><td align="right">0.7703s</td><td align="right">0.7633s</td><td align="right"> -0.90%</td><td align="right">108.76 MiB</td><td align="right">109.03 MiB</td><td align="right"> 0.25%</td></tr><tr><td>🟠 <b>syn</b>:debug:unchanged</td><td align="right">1.0596s</td><td align="right">1.0531s</td><td align="right"> -0.62%</td><td align="right">142.08 MiB</td><td align="right">142.18 MiB</td><td align="right"> 0.07%</td></tr><tr><td>🟠 <b>syntex_syntax</b>:debug:unchanged</td><td align="right">2.7530s</td><td align="right">2.7274s</td><td align="right"> -0.93%</td><td align="right">308.92 MiB</td><td align="right">308.63 MiB</td><td align="right"> -0.09%</td></tr><tr><td>Total</td><td align="right">5.9438s</td><td align="right">5.8969s</td><td align="right"> -0.79%</td><td align="right">765.62 MiB</td><td align="right">765.71 MiB</td><td align="right"> 0.01%</td></tr><tr><td>Summary</td><td align="right">1.0000s</td><td align="right">0.9931s</td><td align="right"> -0.69%</td><td align="right">1 byte</td><td align="right">1.00 bytes</td><td align="right"> 0.06%</td></tr></table> r? `@cjgillot`
2 parents 52daa7d + 6fd274a commit 6c5d278

File tree

2 files changed

+113
-21
lines changed

2 files changed

+113
-21
lines changed

compiler/rustc_incremental/src/persist/load.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ fn load_dep_graph(sess: &Session) -> LoadResult<(Arc<SerializedDepGraph>, WorkPr
171171
return LoadResult::DataOutOfDate;
172172
}
173173

174-
let dep_graph = SerializedDepGraph::decode::<DepsType>(&mut decoder);
174+
let dep_graph = SerializedDepGraph::decode::<DepsType>(&mut decoder, sess);
175175

176176
LoadResult::Ok { data: (dep_graph, prev_work_products) }
177177
}

compiler/rustc_query_system/src/dep_graph/serialized.rs

+112-20
Original file line numberDiff line numberDiff line change
@@ -35,19 +35,20 @@
3535
//! If the number of edges in this node does not fit in the bits available in the header, we
3636
//! store it directly after the header with leb128.
3737
38-
use std::iter;
3938
use std::marker::PhantomData;
40-
use std::sync::Arc;
39+
use std::sync::{Arc, OnceLock};
40+
use std::{iter, thread};
4141

4242
use rustc_data_structures::fingerprint::{Fingerprint, PackedFingerprint};
4343
use rustc_data_structures::fx::FxHashMap;
44-
use rustc_data_structures::outline;
4544
use rustc_data_structures::profiling::SelfProfilerRef;
4645
use rustc_data_structures::sync::Lock;
4746
use rustc_data_structures::unhash::UnhashMap;
47+
use rustc_data_structures::{jobserver, outline};
4848
use rustc_index::{Idx, IndexVec};
4949
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixedSize, MemDecoder};
5050
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
51+
use rustc_session::Session;
5152
use tracing::{debug, instrument};
5253

5354
use super::query::DepGraphQuery;
@@ -74,23 +75,47 @@ const DEP_NODE_PAD: usize = DEP_NODE_SIZE - 1;
7475
const DEP_NODE_WIDTH_BITS: usize = DEP_NODE_SIZE / 2;
7576

7677
/// Data for use when recompiling the **current crate**.
77-
#[derive(Debug, Default)]
7878
pub struct SerializedDepGraph {
7979
/// The set of all DepNodes in the graph
8080
nodes: IndexVec<SerializedDepNodeIndex, DepNode>,
81+
8182
/// The set of all Fingerprints in the graph. Each Fingerprint corresponds to
8283
/// the DepNode at the same index in the nodes vector.
8384
fingerprints: IndexVec<SerializedDepNodeIndex, Fingerprint>,
85+
8486
/// For each DepNode, stores the list of edges originating from that
8587
/// DepNode. Encoded as a [start, end) pair indexing into edge_list_data,
8688
/// which holds the actual DepNodeIndices of the target nodes.
8789
edge_list_indices: IndexVec<SerializedDepNodeIndex, EdgeHeader>,
90+
8891
/// A flattened list of all edge targets in the graph, stored in the same
8992
/// varint encoding that we use on disk. Edge sources are implicit in edge_list_indices.
9093
edge_list_data: Vec<u8>,
94+
9195
/// Stores a map from fingerprints to nodes per dep node kind.
92-
/// This is the reciprocal of `nodes`.
93-
index: Vec<UnhashMap<PackedFingerprint, SerializedDepNodeIndex>>,
96+
/// This is the reciprocal of `nodes`. This is computed on demand for each dep kind.
97+
/// The entire index is also computed in a background thread.
98+
index: Vec<OnceLock<UnhashMap<PackedFingerprint, SerializedDepNodeIndex>>>,
99+
100+
/// Stores the number of node for each dep node kind.
101+
index_sizes: Vec<usize>,
102+
103+
/// A profiler reference for used in the index prefetching thread.
104+
prof: SelfProfilerRef,
105+
}
106+
107+
impl Default for SerializedDepGraph {
108+
fn default() -> Self {
109+
SerializedDepGraph {
110+
nodes: Default::default(),
111+
fingerprints: Default::default(),
112+
edge_list_indices: Default::default(),
113+
edge_list_data: Default::default(),
114+
index: Default::default(),
115+
index_sizes: Default::default(),
116+
prof: SelfProfilerRef::new(None, None),
117+
}
118+
}
94119
}
95120

96121
impl SerializedDepGraph {
@@ -131,9 +156,35 @@ impl SerializedDepGraph {
131156
self.nodes[dep_node_index]
132157
}
133158

159+
/// This computes and sets up the index for just the specified `DepKind`.
160+
fn setup_index(&self, dep_kind: DepKind) {
161+
let _timer = self.prof.generic_activity("incr_comp_dep_graph_setup_index");
162+
163+
let mut index = UnhashMap::with_capacity_and_hasher(
164+
self.index_sizes[dep_kind.as_usize()],
165+
Default::default(),
166+
);
167+
168+
for (idx, node) in self.nodes.iter_enumerated() {
169+
if node.kind == dep_kind {
170+
index.insert(node.hash, idx);
171+
}
172+
}
173+
174+
// This may race with the prefetching thread, but that will set the same value.
175+
self.index[dep_kind.as_usize()].set(index).ok();
176+
}
177+
134178
#[inline]
135179
pub fn node_to_index_opt(&self, dep_node: &DepNode) -> Option<SerializedDepNodeIndex> {
136-
self.index.get(dep_node.kind.as_usize())?.get(&dep_node.hash).cloned()
180+
let index = self.index.get(dep_node.kind.as_usize())?;
181+
let index = index.get().unwrap_or_else(|| {
182+
outline(|| {
183+
self.setup_index(dep_node.kind);
184+
self.index[dep_node.kind.as_usize()].get().unwrap()
185+
})
186+
});
187+
index.get(&dep_node.hash).cloned()
137188
}
138189

139190
#[inline]
@@ -145,6 +196,48 @@ impl SerializedDepGraph {
145196
pub fn node_count(&self) -> usize {
146197
self.nodes.len()
147198
}
199+
200+
fn prefetch(&self) {
201+
let _timer = self.prof.generic_activity("incr_comp_prefetch_dep_graph_index");
202+
203+
let mut index: Vec<_> = self
204+
.index_sizes
205+
.iter()
206+
.map(|&n| UnhashMap::with_capacity_and_hasher(n, Default::default()))
207+
.collect();
208+
209+
// Use a single loop to build indices for all kinds, unlike `setup_index` which builds
210+
// a single index for each loop over the nodes.
211+
for (idx, node) in self.nodes.iter_enumerated() {
212+
index[node.kind.as_usize()].insert(node.hash, idx);
213+
}
214+
215+
for (i, index) in index.into_iter().enumerate() {
216+
// This may race with `setup_index`, but that will set the same value.
217+
self.index[i].set(index).ok();
218+
}
219+
}
220+
221+
/// This spawns a thread that prefetches the index.
222+
fn spawn_prefetch_thread(self: &Arc<Self>) {
223+
if !self.index.is_empty() {
224+
let client = jobserver::client();
225+
// This should ideally use `try_acquire` to avoid races on the tokens,
226+
// but the jobserver crate doesn't support that operation.
227+
if let Ok(tokens) = client.available()
228+
&& tokens > 0
229+
{
230+
let this = Arc::clone(self);
231+
thread::spawn(move || {
232+
let _token = client.acquire();
233+
this.prefetch();
234+
});
235+
} else {
236+
// Prefetch the index on the current thread if we don't have a token available.
237+
self.prefetch();
238+
}
239+
}
240+
}
148241
}
149242

150243
/// A packed representation of an edge's start index and byte width.
@@ -179,8 +272,8 @@ fn mask(bits: usize) -> usize {
179272
}
180273

181274
impl SerializedDepGraph {
182-
#[instrument(level = "debug", skip(d))]
183-
pub fn decode<D: Deps>(d: &mut MemDecoder<'_>) -> Arc<SerializedDepGraph> {
275+
#[instrument(level = "debug", skip(d, sess))]
276+
pub fn decode<D: Deps>(d: &mut MemDecoder<'_>, sess: &Session) -> Arc<SerializedDepGraph> {
184277
// The last 16 bytes are the node count and edge count.
185278
debug!("position: {:?}", d.position());
186279
let (node_count, edge_count) =
@@ -246,22 +339,21 @@ impl SerializedDepGraph {
246339
// end of the array. This padding ensure it doesn't.
247340
edge_list_data.extend(&[0u8; DEP_NODE_PAD]);
248341

249-
// Read the number of each dep kind and use it to create an hash map with a suitable size.
250-
let mut index: Vec<_> = (0..(D::DEP_KIND_MAX + 1))
251-
.map(|_| UnhashMap::with_capacity_and_hasher(d.read_u32() as usize, Default::default()))
252-
.collect();
342+
// Read the number of nodes for each dep kind.
343+
let index_sizes: Vec<_> =
344+
(0..(D::DEP_KIND_MAX + 1)).map(|_| d.read_u32() as usize).collect();
253345

254-
for (idx, node) in nodes.iter_enumerated() {
255-
index[node.kind.as_usize()].insert(node.hash, idx);
256-
}
257-
258-
Arc::new(SerializedDepGraph {
346+
let result = Arc::new(SerializedDepGraph {
259347
nodes,
260348
fingerprints,
261349
edge_list_indices,
262350
edge_list_data,
263-
index,
264-
})
351+
index: (0..index_sizes.len()).map(|_| OnceLock::new()).collect(),
352+
index_sizes,
353+
prof: sess.prof.clone(),
354+
});
355+
result.spawn_prefetch_thread();
356+
result
265357
}
266358
}
267359

0 commit comments

Comments
 (0)