Skip to content

Commit c85a981

Browse files
committed
Put Tree in its own module
This means only the code in tree.rs has the ability to violate the relevant invariants
1 parent 6587eea commit c85a981

File tree

4 files changed

+261
-212
lines changed

4 files changed

+261
-212
lines changed

gix-pack/src/cache/delta/mod.rs

Lines changed: 4 additions & 206 deletions
Original file line numberDiff line numberDiff line change
@@ -17,216 +17,14 @@ pub mod traverse;
1717
///
1818
pub mod from_offsets;
1919

20-
/// An item stored within the [`Tree`] whose data is stored in a pack file, identified by
21-
/// the offset of its first (`offset`) and last (`next_offset`) bytes.
22-
///
23-
/// It represents either a root entry, or one that relies on a base to be resolvable,
24-
/// alongside associated `data` `T`.
25-
pub struct Item<T> {
26-
/// The offset into the pack file at which the pack entry's data is located.
27-
pub offset: crate::data::Offset,
28-
/// The offset of the next item in the pack file.
29-
pub next_offset: crate::data::Offset,
30-
/// Data to store with each Item, effectively data associated with each entry in a pack.
31-
pub data: T,
32-
/// Indices into our Tree's `items`, one for each pack entry that depends on us.
33-
///
34-
/// Limited to u32 as that's the maximum amount of objects in a pack.
35-
// SAFETY INVARIANT:
36-
// - only one Item in a tree may have any given child index. `future_child_offsets`
37-
// should also not contain any indices found in `children`.\
38-
// - These indices should be in bounds for tree.child_items
39-
children: Vec<u32>,
40-
}
41-
42-
/// Identify what kind of node we have last seen
43-
enum NodeKind {
44-
Root,
45-
Child,
46-
}
47-
48-
/// A tree that allows one-time iteration over all nodes and their children, consuming it in the process,
49-
/// while being shareable among threads without a lock.
50-
/// It does this by making the guarantee that iteration only happens once.
51-
pub struct Tree<T> {
52-
/// The root nodes, i.e. base objects
53-
// SAFETY invariant: see Item.children
54-
root_items: Vec<Item<T>>,
55-
/// The child nodes, i.e. those that rely a base object, like ref and ofs delta objects
56-
// SAFETY invariant: see Item.children
57-
child_items: Vec<Item<T>>,
58-
/// The last encountered node was either a root or a child.
59-
last_seen: Option<NodeKind>,
60-
/// Future child offsets, associating their offset into the pack with their index in the items array.
61-
/// (parent_offset, child_index)
62-
// SAFETY invariant:
63-
// - None of these child indices should already have parents
64-
// i.e. future_child_offsets[i].1 should never be also found
65-
// in Item.children. Indices should be found here at most once.
66-
// - These indices should be in bounds for tree.child_items.
67-
future_child_offsets: Vec<(crate::data::Offset, usize)>,
68-
}
69-
70-
impl<T> Tree<T> {
71-
/// Instantiate a empty tree capable of storing `num_objects` amounts of items.
72-
pub fn with_capacity(num_objects: usize) -> Result<Self, Error> {
73-
Ok(Tree {
74-
root_items: Vec::with_capacity(num_objects / 2),
75-
child_items: Vec::with_capacity(num_objects / 2),
76-
last_seen: None,
77-
future_child_offsets: Vec::new(),
78-
})
79-
}
80-
81-
fn num_items(&self) -> usize {
82-
self.root_items.len() + self.child_items.len()
83-
}
84-
85-
fn assert_is_incrementing_and_update_next_offset(&mut self, offset: crate::data::Offset) -> Result<(), Error> {
86-
let items = match &self.last_seen {
87-
Some(NodeKind::Root) => &mut self.root_items,
88-
Some(NodeKind::Child) => &mut self.child_items,
89-
None => return Ok(()),
90-
};
91-
let item = &mut items.last_mut().expect("last seen won't lie");
92-
if offset <= item.offset {
93-
return Err(Error::InvariantIncreasingPackOffset {
94-
last_pack_offset: item.offset,
95-
pack_offset: offset,
96-
});
97-
}
98-
item.next_offset = offset;
99-
Ok(())
100-
}
101-
102-
fn set_pack_entries_end_and_resolve_ref_offsets(
103-
&mut self,
104-
pack_entries_end: crate::data::Offset,
105-
) -> Result<(), traverse::Error> {
106-
if !self.future_child_offsets.is_empty() {
107-
for (parent_offset, child_index) in self.future_child_offsets.drain(..) {
108-
// SAFETY invariants upheld:
109-
// - We are draining from future_child_offsets and adding to children, keeping things the same.
110-
// - We can rely on the `future_child_offsets` invariant to be sure that `children` is
111-
// not getting any indices that are already in use in `children` elsewhere
112-
// - The indices are in bounds for child_items since they were in bounds for future_child_offsets,
113-
// we can carry over the invariant.
114-
if let Ok(i) = self.child_items.binary_search_by_key(&parent_offset, |i| i.offset) {
115-
self.child_items[i].children.push(child_index as u32);
116-
} else if let Ok(i) = self.root_items.binary_search_by_key(&parent_offset, |i| i.offset) {
117-
self.root_items[i].children.push(child_index as u32);
118-
} else {
119-
return Err(traverse::Error::OutOfPackRefDelta {
120-
base_pack_offset: parent_offset,
121-
});
122-
}
123-
}
124-
}
125-
126-
self.assert_is_incrementing_and_update_next_offset(pack_entries_end)
127-
.expect("BUG: pack now is smaller than all previously seen entries");
128-
Ok(())
129-
}
20+
/// Tree datastructure
21+
// kept in separate module to encapsulate unsafety (it has field invariants)
22+
mod tree;
13023

131-
/// Add a new root node, one that only has children but is not a child itself, at the given pack `offset` and associate
132-
/// custom `data` with it.
133-
pub fn add_root(&mut self, offset: crate::data::Offset, data: T) -> Result<(), Error> {
134-
self.assert_is_incrementing_and_update_next_offset(offset)?;
135-
self.last_seen = NodeKind::Root.into();
136-
self.root_items.push(Item {
137-
offset,
138-
next_offset: 0,
139-
data,
140-
// SAFETY INVARIANT upheld: there are no children
141-
children: Default::default(),
142-
});
143-
Ok(())
144-
}
145-
146-
/// Add a child of the item at `base_offset` which itself resides at pack `offset` and associate custom `data` with it.
147-
pub fn add_child(
148-
&mut self,
149-
base_offset: crate::data::Offset,
150-
offset: crate::data::Offset,
151-
data: T,
152-
) -> Result<(), Error> {
153-
self.assert_is_incrementing_and_update_next_offset(offset)?;
154-
155-
let next_child_index = self.child_items.len();
156-
// SAFETY INVARIANT upheld:
157-
// - This is one of two methods that modifies `children` and future_child_offsets. Out
158-
// of the two, it is the only one that produces new indices in the system.
159-
// - This always pushes next_child_index to *either* `children` or `future_child_offsets`,
160-
// maintaining the cross-field invariant there.
161-
// - This method will always push to child_items (at the end), incrementing
162-
// future values of next_child_index. This means next_child_index is always
163-
// unique for this method call.
164-
// - As the only method producing new indices, this is the only time
165-
// next_child_index will be added to children/future_child_offsets, upholding the invariant.
166-
// - Since next_child_index will always be a valid index by the end of this method,
167-
// this always produces valid in-bounds indices, upholding the bounds invariant.
168-
169-
if let Ok(i) = self.child_items.binary_search_by_key(&base_offset, |i| i.offset) {
170-
self.child_items[i].children.push(next_child_index as u32);
171-
} else if let Ok(i) = self.root_items.binary_search_by_key(&base_offset, |i| i.offset) {
172-
self.root_items[i].children.push(next_child_index as u32);
173-
} else {
174-
self.future_child_offsets.push((base_offset, next_child_index));
175-
}
176-
177-
self.last_seen = NodeKind::Child.into();
178-
self.child_items.push(Item {
179-
offset,
180-
next_offset: 0,
181-
data,
182-
// SAFETY INVARIANT upheld: there are no children
183-
children: Default::default(),
184-
});
185-
Ok(())
186-
}
187-
}
24+
pub use tree::{Item, Tree};
18825

18926
#[cfg(test)]
19027
mod tests {
191-
mod tree {
192-
mod from_offsets_in_pack {
193-
use std::sync::atomic::AtomicBool;
194-
195-
use crate as pack;
196-
197-
const SMALL_PACK_INDEX: &str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx";
198-
const SMALL_PACK: &str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack";
199-
200-
const INDEX_V1: &str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx";
201-
const PACK_FOR_INDEX_V1: &str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack";
202-
203-
use gix_testtools::fixture_path;
204-
205-
#[test]
206-
fn v1() -> Result<(), Box<dyn std::error::Error>> {
207-
tree(INDEX_V1, PACK_FOR_INDEX_V1)
208-
}
209-
210-
#[test]
211-
fn v2() -> Result<(), Box<dyn std::error::Error>> {
212-
tree(SMALL_PACK_INDEX, SMALL_PACK)
213-
}
214-
215-
fn tree(index_path: &str, pack_path: &str) -> Result<(), Box<dyn std::error::Error>> {
216-
let idx = pack::index::File::at(fixture_path(index_path), gix_hash::Kind::Sha1)?;
217-
crate::cache::delta::Tree::from_offsets_in_pack(
218-
&fixture_path(pack_path),
219-
idx.sorted_offsets().into_iter(),
220-
&|ofs| *ofs,
221-
&|id| idx.lookup(id).map(|index| idx.pack_offset_at_index(index)),
222-
&mut gix_features::progress::Discard,
223-
&AtomicBool::new(false),
224-
gix_hash::Kind::Sha1,
225-
)?;
226-
Ok(())
227-
}
228-
}
229-
}
23028

23129
#[test]
23230
fn size_of_pack_tree_item() {

gix-pack/src/cache/delta/traverse/mod.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,11 @@ where
132132
let object_progress = OwnShared::new(Mutable::new(object_progress));
133133

134134
let start = std::time::Instant::now();
135-
let child_items = ItemSliceSync::new(&mut self.child_items);
135+
let (mut root_items, mut child_items_vec) = self.take_root_and_child();
136+
let child_items = ItemSliceSync::new(&mut child_items_vec);
136137
let child_items = &child_items;
137138
in_parallel_with_slice(
138-
&mut self.root_items,
139+
&mut root_items,
139140
thread_limit,
140141
{
141142
{
@@ -180,8 +181,8 @@ where
180181
size_progress.show_throughput(start);
181182

182183
Ok(Outcome {
183-
roots: self.root_items,
184-
children: self.child_items,
184+
roots: root_items,
185+
children: child_items_vec,
185186
})
186187
}
187188
}

gix-pack/src/cache/delta/traverse/resolve.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ mod root {
6060

6161
/// Returns true if this node has children, e.g. is not a leaf in the tree.
6262
pub fn has_children(&self) -> bool {
63-
!self.item.children.is_empty()
63+
!self.item.children().is_empty()
6464
}
6565

6666
/// Transform this `Node` into an iterator over its children.
@@ -69,7 +69,7 @@ mod root {
6969
pub fn into_child_iter(self) -> impl Iterator<Item = Node<'a, T>> + 'a {
7070
let children = self.child_items;
7171
#[allow(unsafe_code)]
72-
self.item.children.iter().map(move |&index| {
72+
self.item.children().iter().map(move |&index| {
7373
// SAFETY: Due to the invariant on new(), we can rely on these indices
7474
// being unique.
7575
let item = unsafe { children.get_mut(index as usize) };

0 commit comments

Comments
 (0)