Skip to content

Commit ab22cb7

Browse files
committed
Use a standard HashMap instead of one based on SHA1
It's faster throughout the board. ``` ❯ cargo bench -p [email protected] --bench edit-tree Compiling gix-object v0.44.0 (/Users/byron/dev/github.com/Byron/gitoxide/gix-object) Compiling gix-pack v0.53.0 (/Users/byron/dev/github.com/Byron/gitoxide/gix-pack) Compiling gix-odb v0.63.0 (/Users/byron/dev/github.com/Byron/gitoxide/gix-odb) Finished `bench` profile [optimized] target(s) in 5.97s Running benches/edit_tree.rs (target/release/deps/edit_tree-6af6651a1c453a05) Gnuplot not found, using plotters backend editor/small tree (empty -> full -> empty) time: [2.5972 µs 2.6019 µs 2.6075 µs] thrpt: [3.8351 Melem/s 3.8434 Melem/s 3.8503 Melem/s] change: time: [-32.618% -32.355% -32.038%] (p = 0.00 < 0.05) thrpt: [+47.142% +47.831% +48.409%] Performance has improved. Found 14 outliers among 100 measurements (14.00%) 13 (13.00%) high mild 1 (1.00%) high severe editor/deeply nested tree (empty -> full -> empty) time: [8.2019 µs 8.2079 µs 8.2145 µs] thrpt: [5.5998 Melem/s 5.6043 Melem/s 5.6084 Melem/s] change: time: [-33.517% -33.377% -33.246%] (p = 0.00 < 0.05) thrpt: [+49.804% +50.099% +50.415%] Performance has improved. Found 13 outliers among 100 measurements (13.00%) 8 (8.00%) high mild 5 (5.00%) high severe cursor/small tree (empty -> full -> empty) time: [2.6911 µs 2.6935 µs 2.6961 µs] thrpt: [3.7090 Melem/s 3.7127 Melem/s 3.7160 Melem/s] change: time: [-33.881% -33.546% -33.225%] (p = 0.00 < 0.05) thrpt: [+49.757% +50.480% +51.242%] Performance has improved. Found 14 outliers among 100 measurements (14.00%) 4 (4.00%) high mild 10 (10.00%) high severe cursor/deeply nested tree (empty -> full -> empty) time: [1.3616 µs 1.3631 µs 1.3649 µs] thrpt: [33.703 Melem/s 33.747 Melem/s 33.783 Melem/s] change: time: [-40.063% -39.675% -39.234%] (p = 0.00 < 0.05) thrpt: [+64.566% +65.769% +66.843%] Performance has improved. Found 20 outliers among 100 measurements (20.00%) 18 (18.00%) high mild 2 (2.00%) high severe ```
1 parent 0d9868c commit ab22cb7

File tree

2 files changed

+13
-22
lines changed

2 files changed

+13
-22
lines changed

gix-object/src/tree/editor.rs

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ use crate::tree::{Editor, EntryKind};
22
use crate::{tree, Tree};
33
use bstr::{BStr, BString, ByteSlice, ByteVec};
44
use gix_hash::ObjectId;
5-
use gix_hashtable::hash_map::Entry;
65
use std::cmp::Ordering;
6+
use std::collections::{hash_map, HashMap};
77
use std::fmt::Formatter;
88

99
/// A way to constrain all [tree-edits](Editor) to a given subtree.
@@ -45,7 +45,7 @@ impl<'a> Editor<'a> {
4545
Editor {
4646
find,
4747
object_hash,
48-
trees: gix_hashtable::HashMap::from_iter(Some((empty_path_hash(), root))),
48+
trees: HashMap::from_iter(Some((empty_path(), root))),
4949
path_buf: Vec::with_capacity(256).into(),
5050
tree_buf: Vec::with_capacity(512),
5151
}
@@ -160,7 +160,7 @@ impl<'a> Editor<'a> {
160160
}
161161
Err(err) => {
162162
let root_tree = parents.into_iter().next().expect("root wasn't consumed yet");
163-
self.trees.insert(path_hash(&root_tree.1), root_tree.2);
163+
self.trees.insert(root_tree.1, root_tree.2);
164164
return Err(err);
165165
}
166166
}
@@ -179,11 +179,11 @@ impl<'a> Editor<'a> {
179179
}
180180
WriteMode::FromCursor => {}
181181
}
182-
self.trees.insert(path_hash(&rela_path), tree);
182+
self.trees.insert(rela_path, tree);
183183
return Ok(root_tree_id);
184184
}
185185
Err(err) => {
186-
self.trees.insert(path_hash(&rela_path), tree);
186+
self.trees.insert(rela_path, tree);
187187
return Err(err);
188188
}
189189
}
@@ -297,8 +297,8 @@ impl<'a> Editor<'a> {
297297
push_path_component(&mut self.path_buf, name);
298298
let path_id = path_hash(&self.path_buf);
299299
cursor = match self.trees.entry(path_id) {
300-
Entry::Occupied(e) => e.into_mut(),
301-
Entry::Vacant(e) => e.insert(
300+
hash_map::Entry::Occupied(e) => e.into_mut(),
301+
hash_map::Entry::Vacant(e) => e.insert(
302302
if let Some(tree_id) = tree_to_lookup.filter(|tree_id| !tree_id.is_empty_tree()) {
303303
self.find.find_tree(&tree_id, &mut self.tree_buf)?.into()
304304
} else {
@@ -317,7 +317,7 @@ impl<'a> Editor<'a> {
317317
/// This is useful if the same editor is re-used for various trees.
318318
pub fn set_root(&mut self, root: Tree) -> &mut Self {
319319
self.trees.clear();
320-
self.trees.insert(empty_path_hash(), root);
320+
self.trees.insert(empty_path(), root);
321321
self
322322
}
323323
}
@@ -420,14 +420,12 @@ fn filename(path: &BStr) -> &BStr {
420420
path.rfind_byte(b'/').map_or(path, |pos| &path[pos + 1..])
421421
}
422422

423-
fn empty_path_hash() -> ObjectId {
424-
gix_features::hash::hasher(gix_hash::Kind::Sha1).digest().into()
423+
fn empty_path() -> BString {
424+
BString::default()
425425
}
426426

427-
fn path_hash(path: &[u8]) -> ObjectId {
428-
let mut hasher = gix_features::hash::hasher(gix_hash::Kind::Sha1);
429-
hasher.update(path);
430-
hasher.digest().into()
427+
fn path_hash(path: &[u8]) -> BString {
428+
path.to_vec().into()
431429
}
432430

433431
fn push_path_component(base: &mut BString, component: &[u8]) -> usize {

gix-object/src/tree/mod.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ use crate::{
22
bstr::{BStr, BString},
33
tree, Tree,
44
};
5-
use gix_hash::ObjectId;
65
use std::cmp::Ordering;
76

87
///
@@ -19,12 +18,6 @@ pub mod write;
1918
///
2019
/// The editor is optimized to edit existing trees, but can deal with building entirely new trees as well
2120
/// with some penalties.
22-
///
23-
/// ### Note
24-
///
25-
/// For reasons of efficiency, internally a SHA1 based hashmap is used to avoid having to store full paths
26-
/// to each edited tree. The chance of collision is low, but could be engineered to overwrite or write into
27-
/// an unintended tree.
2821
#[doc(alias = "TreeUpdateBuilder", alias = "git2")]
2922
#[derive(Clone)]
3023
pub struct Editor<'a> {
@@ -35,7 +28,7 @@ pub struct Editor<'a> {
3528
/// All trees we currently hold in memory. Each of these may change while adding and removing entries.
3629
/// null-object-ids mark tree-entries whose value we don't know yet, they are placeholders that will be
3730
/// dropped when writing at the latest.
38-
trees: gix_hashtable::HashMap<ObjectId, Tree>,
31+
trees: std::collections::HashMap<BString, Tree>,
3932
/// A buffer to build up paths when finding the tree to edit.
4033
path_buf: BString,
4134
/// Our buffer for storing tree-data in, right before decoding it.

0 commit comments

Comments
 (0)