|
1 | 1 | use anyhow::anyhow;
|
| 2 | +use dashmap::DashSet; |
2 | 3 | use git_hash::{bstr::BStr, bstr::ByteSlice, ObjectId};
|
3 | 4 | use git_object::immutable::tree::Entry;
|
4 | 5 | use git_odb::Find;
|
@@ -65,13 +66,33 @@ fn main() -> anyhow::Result<()> {
|
65 | 66 | );
|
66 | 67 |
|
67 | 68 | let start = Instant::now();
|
68 |
| - let (unique, entries) = do_gitoxide_tree_dag_traversal(&all_commits, &db, || { |
69 |
| - git_odb::pack::cache::lru::MemoryCappedHashmap::new(GITOXIDE_CACHED_OBJECT_DATA_PER_THREAD_IN_BYTES) |
70 |
| - })?; |
| 69 | + let (unique, entries) = do_gitoxide_tree_dag_traversal( |
| 70 | + &all_commits, |
| 71 | + &db, |
| 72 | + || git_odb::pack::cache::lru::StaticLinkedList::<64>::default(), |
| 73 | + Computation::MultiThreaded, |
| 74 | + )?; |
71 | 75 | let elapsed = start.elapsed();
|
72 | 76 | println!(
|
73 |
| - "gitoxide (cache = {:.0}MB): confirmed {} entries ({} unique objects) in {} trees in {:?} ({:0.0} entries/s, {:0.0} trees/s)", |
74 |
| - GITOXIDE_CACHED_OBJECT_DATA_PER_THREAD_IN_BYTES as f32 / (1024 * 1024) as f32, |
| 77 | + "gitoxide PARALLEL (cache = 64 entries: confirmed {} entries ({} unique objects) in {} trees in {:?} ({:0.0} entries/s, {:0.0} trees/s)", |
| 78 | + entries, |
| 79 | + unique, |
| 80 | + all_commits.len(), |
| 81 | + elapsed, |
| 82 | + entries as f32 / elapsed.as_secs_f32(), |
| 83 | + all_commits.len() as f32 / elapsed.as_secs_f32() |
| 84 | + ); |
| 85 | + |
| 86 | + let start = Instant::now(); |
| 87 | + let (unique, entries) = do_gitoxide_tree_dag_traversal( |
| 88 | + &all_commits, |
| 89 | + &db, |
| 90 | + || git_odb::pack::cache::lru::StaticLinkedList::<64>::default(), |
| 91 | + Computation::SingleThreaded, |
| 92 | + )?; |
| 93 | + let elapsed = start.elapsed(); |
| 94 | + println!( |
| 95 | + "gitoxide (cache = 64 entries: confirmed {} entries ({} unique objects) in {} trees in {:?} ({:0.0} entries/s, {:0.0} trees/s)", |
75 | 96 | entries,
|
76 | 97 | unique,
|
77 | 98 | all_commits.len(),
|
@@ -172,69 +193,146 @@ where
|
172 | 193 | Ok(commits)
|
173 | 194 | }
|
174 | 195 |
|
| 196 | +enum Computation { |
| 197 | + SingleThreaded, |
| 198 | + MultiThreaded, |
| 199 | +} |
| 200 | + |
175 | 201 | fn do_gitoxide_tree_dag_traversal<C>(
|
176 | 202 | commits: &[ObjectId],
|
177 | 203 | db: &git_odb::linked::Db,
|
178 |
| - new_cache: impl FnOnce() -> C, |
| 204 | + new_cache: impl Fn() -> C + Sync + Send, |
| 205 | + mode: Computation, |
179 | 206 | ) -> anyhow::Result<(usize, u64)>
|
180 | 207 | where
|
181 | 208 | C: git_odb::pack::cache::DecodeEntry,
|
182 | 209 | {
|
183 |
| - #[derive(Default)] |
184 |
| - struct Count { |
185 |
| - entries: usize, |
186 |
| - seen: HashSet<ObjectId>, |
187 |
| - } |
| 210 | + match mode { |
| 211 | + Computation::SingleThreaded => { |
| 212 | + #[derive(Default)] |
| 213 | + struct Count { |
| 214 | + entries: usize, |
| 215 | + seen: HashSet<ObjectId>, |
| 216 | + } |
188 | 217 |
|
189 |
| - impl tree::visit::Visit for Count { |
190 |
| - type PathId = (); |
191 |
| - fn set_current_path(&mut self, _id: Self::PathId) {} |
192 |
| - fn push_tracked_path_component(&mut self, _component: &BStr) -> Self::PathId {} |
193 |
| - fn push_path_component(&mut self, _component: &BStr) {} |
194 |
| - fn pop_path_component(&mut self) {} |
195 |
| - fn visit_tree(&mut self, entry: &Entry<'_>) -> Action { |
196 |
| - self.entries += 1; |
197 |
| - let inserted = self.seen.insert(entry.oid.to_owned()); |
198 |
| - if !inserted { |
199 |
| - tree::visit::Action::Skip |
200 |
| - } else { |
201 |
| - tree::visit::Action::Continue |
| 218 | + impl tree::visit::Visit for Count { |
| 219 | + type PathId = (); |
| 220 | + fn set_current_path(&mut self, _id: Self::PathId) {} |
| 221 | + fn push_tracked_path_component(&mut self, _component: &BStr) -> Self::PathId {} |
| 222 | + fn push_path_component(&mut self, _component: &BStr) {} |
| 223 | + fn pop_path_component(&mut self) {} |
| 224 | + fn visit_tree(&mut self, entry: &Entry<'_>) -> Action { |
| 225 | + self.entries += 1; |
| 226 | + let inserted = self.seen.insert(entry.oid.to_owned()); |
| 227 | + if !inserted { |
| 228 | + tree::visit::Action::Skip |
| 229 | + } else { |
| 230 | + tree::visit::Action::Continue |
| 231 | + } |
| 232 | + } |
| 233 | + fn visit_nontree(&mut self, entry: &Entry<'_>) -> Action { |
| 234 | + self.entries += 1; |
| 235 | + self.seen.insert(entry.oid.to_owned()); |
| 236 | + tree::visit::Action::Continue |
| 237 | + } |
202 | 238 | }
|
203 |
| - } |
204 |
| - fn visit_nontree(&mut self, entry: &Entry<'_>) -> Action { |
205 |
| - self.entries += 1; |
206 |
| - self.seen.insert(entry.oid.to_owned()); |
207 |
| - tree::visit::Action::Continue |
208 |
| - } |
209 |
| - } |
210 | 239 |
|
211 |
| - let mut cache = new_cache(); |
212 |
| - let mut buf = Vec::new(); |
213 |
| - let mut seen = HashSet::new(); |
214 |
| - let mut entries = 0; |
| 240 | + let mut cache = new_cache(); |
| 241 | + let mut buf = Vec::new(); |
| 242 | + let mut state = tree::breadthfirst::State::default(); |
| 243 | + let mut seen = HashSet::new(); |
| 244 | + let mut entries = 0; |
215 | 245 |
|
216 |
| - for commit in commits { |
217 |
| - let tid = db |
218 |
| - .find(commit, &mut buf, &mut cache)? |
219 |
| - .and_then(|o| o.into_commit_iter().and_then(|mut c| c.tree_id())) |
220 |
| - .expect("commit as starting point"); |
| 246 | + for commit in commits { |
| 247 | + let tid = db |
| 248 | + .find(commit, &mut buf, &mut cache)? |
| 249 | + .and_then(|o| o.into_commit_iter().and_then(|mut c| c.tree_id())) |
| 250 | + .expect("commit as starting point"); |
| 251 | + |
| 252 | + let mut count = Count { entries: 0, seen }; |
| 253 | + tree::breadthfirst::traverse( |
| 254 | + tid, |
| 255 | + &mut state, |
| 256 | + |oid, buf| { |
| 257 | + db.find(oid, buf, &mut cache) |
| 258 | + .ok() |
| 259 | + .flatten() |
| 260 | + .and_then(|o| o.into_tree_iter()) |
| 261 | + }, |
| 262 | + &mut count, |
| 263 | + )?; |
| 264 | + entries += count.entries as u64; |
| 265 | + seen = count.seen; |
| 266 | + } |
| 267 | + Ok((seen.len(), entries)) |
| 268 | + } |
| 269 | + Computation::MultiThreaded => { |
| 270 | + struct Count<'a> { |
| 271 | + entries: usize, |
| 272 | + seen: &'a DashSet<ObjectId>, |
| 273 | + } |
221 | 274 |
|
222 |
| - let mut count = Count { entries: 0, seen }; |
223 |
| - tree::breadthfirst::traverse( |
224 |
| - tid, |
225 |
| - tree::breadthfirst::State::default(), |
226 |
| - |oid, buf| { |
227 |
| - db.find(oid, buf, &mut cache) |
228 |
| - .ok() |
229 |
| - .flatten() |
230 |
| - .and_then(|o| o.into_tree_iter()) |
231 |
| - }, |
232 |
| - &mut count, |
233 |
| - )?; |
234 |
| - entries += count.entries as u64; |
235 |
| - seen = count.seen; |
| 275 | + impl<'a> tree::visit::Visit for Count<'a> { |
| 276 | + type PathId = (); |
| 277 | + fn set_current_path(&mut self, _id: Self::PathId) {} |
| 278 | + fn push_tracked_path_component(&mut self, _component: &BStr) -> Self::PathId {} |
| 279 | + fn push_path_component(&mut self, _component: &BStr) {} |
| 280 | + fn pop_path_component(&mut self) {} |
| 281 | + fn visit_tree(&mut self, entry: &Entry<'_>) -> Action { |
| 282 | + self.entries += 1; |
| 283 | + let inserted = self.seen.insert(entry.oid.to_owned()); |
| 284 | + if !inserted { |
| 285 | + tree::visit::Action::Skip |
| 286 | + } else { |
| 287 | + tree::visit::Action::Continue |
| 288 | + } |
| 289 | + } |
| 290 | + fn visit_nontree(&mut self, entry: &Entry<'_>) -> Action { |
| 291 | + self.entries += 1; |
| 292 | + self.seen.insert(entry.oid.to_owned()); |
| 293 | + tree::visit::Action::Continue |
| 294 | + } |
| 295 | + } |
| 296 | + use rayon::prelude::*; |
| 297 | + let seen = DashSet::new(); |
| 298 | + let entries = std::sync::atomic::AtomicU64::new(0); |
| 299 | + |
| 300 | + commits |
| 301 | + .into_par_iter() |
| 302 | + .try_for_each_init::<_, _, _, anyhow::Result<_>>( |
| 303 | + { |
| 304 | + let new_cache = &new_cache; |
| 305 | + let seen = &seen; |
| 306 | + move || { |
| 307 | + ( |
| 308 | + Count { |
| 309 | + entries: 0, |
| 310 | + seen: &seen, |
| 311 | + }, |
| 312 | + Vec::<u8>::new(), |
| 313 | + new_cache(), |
| 314 | + tree::breadthfirst::State::default(), |
| 315 | + ) |
| 316 | + } |
| 317 | + }, |
| 318 | + |(count, buf, cache, state), commit| { |
| 319 | + let tid = db |
| 320 | + .find(commit, buf, cache)? |
| 321 | + .and_then(|o| o.into_commit_iter().and_then(|mut c| c.tree_id())) |
| 322 | + .expect("commit as starting point"); |
| 323 | + tree::breadthfirst::traverse( |
| 324 | + tid, |
| 325 | + state, |
| 326 | + |oid, buf| db.find(oid, buf, cache).ok().flatten().and_then(|o| o.into_tree_iter()), |
| 327 | + count, |
| 328 | + )?; |
| 329 | + entries.fetch_add(count.entries as u64, std::sync::atomic::Ordering::Relaxed); |
| 330 | + Ok(()) |
| 331 | + }, |
| 332 | + )?; |
| 333 | + Ok((seen.len(), entries.load(std::sync::atomic::Ordering::Acquire))) |
| 334 | + } |
236 | 335 | }
|
237 |
| - Ok((seen.len(), entries)) |
238 | 336 | }
|
239 | 337 |
|
240 | 338 | fn do_libgit2_tree_dag_traversal(commits: &[ObjectId], db: &git2::Repository) -> anyhow::Result<(usize, u64)> {
|
|
0 commit comments