Skip to content

Commit 8f741b9

Browse files
committed
[traverse-tree] multi-threaded gathering of unique objects
1 parent 3f9ee23 commit 8f741b9

File tree

4 files changed

+156
-59
lines changed

4 files changed

+156
-59
lines changed

Cargo.lock

Lines changed: 1 addition & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

experiments/traversal/Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ git-odb = { version = "^0.14", path = "../../git-odb", features = ["pack-cache-l
1313
git-traverse = { version = "0.1", path = "../../git-traverse" }
1414
git-hash = { version = "^0.3", path = "../../git-hash" }
1515
git-object = { version = "^0.8", path = "../../git-object" }
16-
git-diff = { version = "^0.2", path = "../../git-diff" }
1716
git2 = "0.13"
1817
rayon = "1.5.0"
19-
memory-lru = "0.1.0"
18+
dashmap = "4.0.2"

experiments/traversal/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ An experiment to see how fast we can traverse
66
and compare that to libgit2.
77

88
This is relevant for learning which objects to add to a pack in case of a clone.
9+
[Here is the discussion with additional performance data.](https://github.com/Byron/gitoxide/discussions/76).

experiments/traversal/src/main.rs

Lines changed: 153 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use anyhow::anyhow;
2+
use dashmap::DashSet;
23
use git_hash::{bstr::BStr, bstr::ByteSlice, ObjectId};
34
use git_object::immutable::tree::Entry;
45
use git_odb::Find;
@@ -65,13 +66,33 @@ fn main() -> anyhow::Result<()> {
6566
);
6667

6768
let start = Instant::now();
68-
let (unique, entries) = do_gitoxide_tree_dag_traversal(&all_commits, &db, || {
69-
git_odb::pack::cache::lru::MemoryCappedHashmap::new(GITOXIDE_CACHED_OBJECT_DATA_PER_THREAD_IN_BYTES)
70-
})?;
69+
let (unique, entries) = do_gitoxide_tree_dag_traversal(
70+
&all_commits,
71+
&db,
72+
|| git_odb::pack::cache::lru::StaticLinkedList::<64>::default(),
73+
Computation::MultiThreaded,
74+
)?;
7175
let elapsed = start.elapsed();
7276
println!(
73-
"gitoxide (cache = {:.0}MB): confirmed {} entries ({} unique objects) in {} trees in {:?} ({:0.0} entries/s, {:0.0} trees/s)",
74-
GITOXIDE_CACHED_OBJECT_DATA_PER_THREAD_IN_BYTES as f32 / (1024 * 1024) as f32,
77+
"gitoxide PARALLEL (cache = 64 entries: confirmed {} entries ({} unique objects) in {} trees in {:?} ({:0.0} entries/s, {:0.0} trees/s)",
78+
entries,
79+
unique,
80+
all_commits.len(),
81+
elapsed,
82+
entries as f32 / elapsed.as_secs_f32(),
83+
all_commits.len() as f32 / elapsed.as_secs_f32()
84+
);
85+
86+
let start = Instant::now();
87+
let (unique, entries) = do_gitoxide_tree_dag_traversal(
88+
&all_commits,
89+
&db,
90+
|| git_odb::pack::cache::lru::StaticLinkedList::<64>::default(),
91+
Computation::SingleThreaded,
92+
)?;
93+
let elapsed = start.elapsed();
94+
println!(
95+
"gitoxide (cache = 64 entries: confirmed {} entries ({} unique objects) in {} trees in {:?} ({:0.0} entries/s, {:0.0} trees/s)",
7596
entries,
7697
unique,
7798
all_commits.len(),
@@ -172,69 +193,146 @@ where
172193
Ok(commits)
173194
}
174195

196+
enum Computation {
197+
SingleThreaded,
198+
MultiThreaded,
199+
}
200+
175201
fn do_gitoxide_tree_dag_traversal<C>(
176202
commits: &[ObjectId],
177203
db: &git_odb::linked::Db,
178-
new_cache: impl FnOnce() -> C,
204+
new_cache: impl Fn() -> C + Sync + Send,
205+
mode: Computation,
179206
) -> anyhow::Result<(usize, u64)>
180207
where
181208
C: git_odb::pack::cache::DecodeEntry,
182209
{
183-
#[derive(Default)]
184-
struct Count {
185-
entries: usize,
186-
seen: HashSet<ObjectId>,
187-
}
210+
match mode {
211+
Computation::SingleThreaded => {
212+
#[derive(Default)]
213+
struct Count {
214+
entries: usize,
215+
seen: HashSet<ObjectId>,
216+
}
188217

189-
impl tree::visit::Visit for Count {
190-
type PathId = ();
191-
fn set_current_path(&mut self, _id: Self::PathId) {}
192-
fn push_tracked_path_component(&mut self, _component: &BStr) -> Self::PathId {}
193-
fn push_path_component(&mut self, _component: &BStr) {}
194-
fn pop_path_component(&mut self) {}
195-
fn visit_tree(&mut self, entry: &Entry<'_>) -> Action {
196-
self.entries += 1;
197-
let inserted = self.seen.insert(entry.oid.to_owned());
198-
if !inserted {
199-
tree::visit::Action::Skip
200-
} else {
201-
tree::visit::Action::Continue
218+
impl tree::visit::Visit for Count {
219+
type PathId = ();
220+
fn set_current_path(&mut self, _id: Self::PathId) {}
221+
fn push_tracked_path_component(&mut self, _component: &BStr) -> Self::PathId {}
222+
fn push_path_component(&mut self, _component: &BStr) {}
223+
fn pop_path_component(&mut self) {}
224+
fn visit_tree(&mut self, entry: &Entry<'_>) -> Action {
225+
self.entries += 1;
226+
let inserted = self.seen.insert(entry.oid.to_owned());
227+
if !inserted {
228+
tree::visit::Action::Skip
229+
} else {
230+
tree::visit::Action::Continue
231+
}
232+
}
233+
fn visit_nontree(&mut self, entry: &Entry<'_>) -> Action {
234+
self.entries += 1;
235+
self.seen.insert(entry.oid.to_owned());
236+
tree::visit::Action::Continue
237+
}
202238
}
203-
}
204-
fn visit_nontree(&mut self, entry: &Entry<'_>) -> Action {
205-
self.entries += 1;
206-
self.seen.insert(entry.oid.to_owned());
207-
tree::visit::Action::Continue
208-
}
209-
}
210239

211-
let mut cache = new_cache();
212-
let mut buf = Vec::new();
213-
let mut seen = HashSet::new();
214-
let mut entries = 0;
240+
let mut cache = new_cache();
241+
let mut buf = Vec::new();
242+
let mut state = tree::breadthfirst::State::default();
243+
let mut seen = HashSet::new();
244+
let mut entries = 0;
215245

216-
for commit in commits {
217-
let tid = db
218-
.find(commit, &mut buf, &mut cache)?
219-
.and_then(|o| o.into_commit_iter().and_then(|mut c| c.tree_id()))
220-
.expect("commit as starting point");
246+
for commit in commits {
247+
let tid = db
248+
.find(commit, &mut buf, &mut cache)?
249+
.and_then(|o| o.into_commit_iter().and_then(|mut c| c.tree_id()))
250+
.expect("commit as starting point");
251+
252+
let mut count = Count { entries: 0, seen };
253+
tree::breadthfirst::traverse(
254+
tid,
255+
&mut state,
256+
|oid, buf| {
257+
db.find(oid, buf, &mut cache)
258+
.ok()
259+
.flatten()
260+
.and_then(|o| o.into_tree_iter())
261+
},
262+
&mut count,
263+
)?;
264+
entries += count.entries as u64;
265+
seen = count.seen;
266+
}
267+
Ok((seen.len(), entries))
268+
}
269+
Computation::MultiThreaded => {
270+
struct Count<'a> {
271+
entries: usize,
272+
seen: &'a DashSet<ObjectId>,
273+
}
221274

222-
let mut count = Count { entries: 0, seen };
223-
tree::breadthfirst::traverse(
224-
tid,
225-
tree::breadthfirst::State::default(),
226-
|oid, buf| {
227-
db.find(oid, buf, &mut cache)
228-
.ok()
229-
.flatten()
230-
.and_then(|o| o.into_tree_iter())
231-
},
232-
&mut count,
233-
)?;
234-
entries += count.entries as u64;
235-
seen = count.seen;
275+
impl<'a> tree::visit::Visit for Count<'a> {
276+
type PathId = ();
277+
fn set_current_path(&mut self, _id: Self::PathId) {}
278+
fn push_tracked_path_component(&mut self, _component: &BStr) -> Self::PathId {}
279+
fn push_path_component(&mut self, _component: &BStr) {}
280+
fn pop_path_component(&mut self) {}
281+
fn visit_tree(&mut self, entry: &Entry<'_>) -> Action {
282+
self.entries += 1;
283+
let inserted = self.seen.insert(entry.oid.to_owned());
284+
if !inserted {
285+
tree::visit::Action::Skip
286+
} else {
287+
tree::visit::Action::Continue
288+
}
289+
}
290+
fn visit_nontree(&mut self, entry: &Entry<'_>) -> Action {
291+
self.entries += 1;
292+
self.seen.insert(entry.oid.to_owned());
293+
tree::visit::Action::Continue
294+
}
295+
}
296+
use rayon::prelude::*;
297+
let seen = DashSet::new();
298+
let entries = std::sync::atomic::AtomicU64::new(0);
299+
300+
commits
301+
.into_par_iter()
302+
.try_for_each_init::<_, _, _, anyhow::Result<_>>(
303+
{
304+
let new_cache = &new_cache;
305+
let seen = &seen;
306+
move || {
307+
(
308+
Count {
309+
entries: 0,
310+
seen: &seen,
311+
},
312+
Vec::<u8>::new(),
313+
new_cache(),
314+
tree::breadthfirst::State::default(),
315+
)
316+
}
317+
},
318+
|(count, buf, cache, state), commit| {
319+
let tid = db
320+
.find(commit, buf, cache)?
321+
.and_then(|o| o.into_commit_iter().and_then(|mut c| c.tree_id()))
322+
.expect("commit as starting point");
323+
tree::breadthfirst::traverse(
324+
tid,
325+
state,
326+
|oid, buf| db.find(oid, buf, cache).ok().flatten().and_then(|o| o.into_tree_iter()),
327+
count,
328+
)?;
329+
entries.fetch_add(count.entries as u64, std::sync::atomic::Ordering::Relaxed);
330+
Ok(())
331+
},
332+
)?;
333+
Ok((seen.len(), entries.load(std::sync::atomic::Ordering::Acquire)))
334+
}
236335
}
237-
Ok((seen.len(), entries))
238336
}
239337

240338
fn do_libgit2_tree_dag_traversal(commits: &[ObjectId], db: &git2::Repository) -> anyhow::Result<(usize, u64)> {

0 commit comments

Comments
 (0)