@@ -17,216 +17,14 @@ pub mod traverse;
17
17
///
18
18
pub mod from_offsets;
19
19
20
- /// An item stored within the [`Tree`] whose data is stored in a pack file, identified by
21
- /// the offset of its first (`offset`) and last (`next_offset`) bytes.
22
- ///
23
- /// It represents either a root entry, or one that relies on a base to be resolvable,
24
- /// alongside associated `data` `T`.
25
- pub struct Item < T > {
26
- /// The offset into the pack file at which the pack entry's data is located.
27
- pub offset : crate :: data:: Offset ,
28
- /// The offset of the next item in the pack file.
29
- pub next_offset : crate :: data:: Offset ,
30
- /// Data to store with each Item, effectively data associated with each entry in a pack.
31
- pub data : T ,
32
- /// Indices into our Tree's `items`, one for each pack entry that depends on us.
33
- ///
34
- /// Limited to u32 as that's the maximum amount of objects in a pack.
35
- // SAFETY INVARIANT:
36
- // - only one Item in a tree may have any given child index. `future_child_offsets`
37
- // should also not contain any indices found in `children`.\
38
- // - These indices should be in bounds for tree.child_items
39
- children : Vec < u32 > ,
40
- }
41
-
42
- /// Identify what kind of node we have last seen
43
- enum NodeKind {
44
- Root ,
45
- Child ,
46
- }
47
-
48
- /// A tree that allows one-time iteration over all nodes and their children, consuming it in the process,
49
- /// while being shareable among threads without a lock.
50
- /// It does this by making the guarantee that iteration only happens once.
51
- pub struct Tree < T > {
52
- /// The root nodes, i.e. base objects
53
- // SAFETY invariant: see Item.children
54
- root_items : Vec < Item < T > > ,
55
- /// The child nodes, i.e. those that rely a base object, like ref and ofs delta objects
56
- // SAFETY invariant: see Item.children
57
- child_items : Vec < Item < T > > ,
58
- /// The last encountered node was either a root or a child.
59
- last_seen : Option < NodeKind > ,
60
- /// Future child offsets, associating their offset into the pack with their index in the items array.
61
- /// (parent_offset, child_index)
62
- // SAFETY invariant:
63
- // - None of these child indices should already have parents
64
- // i.e. future_child_offsets[i].1 should never be also found
65
- // in Item.children. Indices should be found here at most once.
66
- // - These indices should be in bounds for tree.child_items.
67
- future_child_offsets : Vec < ( crate :: data:: Offset , usize ) > ,
68
- }
69
-
70
- impl < T > Tree < T > {
71
- /// Instantiate a empty tree capable of storing `num_objects` amounts of items.
72
- pub fn with_capacity ( num_objects : usize ) -> Result < Self , Error > {
73
- Ok ( Tree {
74
- root_items : Vec :: with_capacity ( num_objects / 2 ) ,
75
- child_items : Vec :: with_capacity ( num_objects / 2 ) ,
76
- last_seen : None ,
77
- future_child_offsets : Vec :: new ( ) ,
78
- } )
79
- }
80
-
81
- fn num_items ( & self ) -> usize {
82
- self . root_items . len ( ) + self . child_items . len ( )
83
- }
84
-
85
- fn assert_is_incrementing_and_update_next_offset ( & mut self , offset : crate :: data:: Offset ) -> Result < ( ) , Error > {
86
- let items = match & self . last_seen {
87
- Some ( NodeKind :: Root ) => & mut self . root_items ,
88
- Some ( NodeKind :: Child ) => & mut self . child_items ,
89
- None => return Ok ( ( ) ) ,
90
- } ;
91
- let item = & mut items. last_mut ( ) . expect ( "last seen won't lie" ) ;
92
- if offset <= item. offset {
93
- return Err ( Error :: InvariantIncreasingPackOffset {
94
- last_pack_offset : item. offset ,
95
- pack_offset : offset,
96
- } ) ;
97
- }
98
- item. next_offset = offset;
99
- Ok ( ( ) )
100
- }
101
-
102
- fn set_pack_entries_end_and_resolve_ref_offsets (
103
- & mut self ,
104
- pack_entries_end : crate :: data:: Offset ,
105
- ) -> Result < ( ) , traverse:: Error > {
106
- if !self . future_child_offsets . is_empty ( ) {
107
- for ( parent_offset, child_index) in self . future_child_offsets . drain ( ..) {
108
- // SAFETY invariants upheld:
109
- // - We are draining from future_child_offsets and adding to children, keeping things the same.
110
- // - We can rely on the `future_child_offsets` invariant to be sure that `children` is
111
- // not getting any indices that are already in use in `children` elsewhere
112
- // - The indices are in bounds for child_items since they were in bounds for future_child_offsets,
113
- // we can carry over the invariant.
114
- if let Ok ( i) = self . child_items . binary_search_by_key ( & parent_offset, |i| i. offset ) {
115
- self . child_items [ i] . children . push ( child_index as u32 ) ;
116
- } else if let Ok ( i) = self . root_items . binary_search_by_key ( & parent_offset, |i| i. offset ) {
117
- self . root_items [ i] . children . push ( child_index as u32 ) ;
118
- } else {
119
- return Err ( traverse:: Error :: OutOfPackRefDelta {
120
- base_pack_offset : parent_offset,
121
- } ) ;
122
- }
123
- }
124
- }
125
-
126
- self . assert_is_incrementing_and_update_next_offset ( pack_entries_end)
127
- . expect ( "BUG: pack now is smaller than all previously seen entries" ) ;
128
- Ok ( ( ) )
129
- }
20
+ /// Tree datastructure
21
+ // kept in separate module to encapsulate unsafety (it has field invariants)
22
+ mod tree;
130
23
131
- /// Add a new root node, one that only has children but is not a child itself, at the given pack `offset` and associate
132
- /// custom `data` with it.
133
- pub fn add_root ( & mut self , offset : crate :: data:: Offset , data : T ) -> Result < ( ) , Error > {
134
- self . assert_is_incrementing_and_update_next_offset ( offset) ?;
135
- self . last_seen = NodeKind :: Root . into ( ) ;
136
- self . root_items . push ( Item {
137
- offset,
138
- next_offset : 0 ,
139
- data,
140
- // SAFETY INVARIANT upheld: there are no children
141
- children : Default :: default ( ) ,
142
- } ) ;
143
- Ok ( ( ) )
144
- }
145
-
146
- /// Add a child of the item at `base_offset` which itself resides at pack `offset` and associate custom `data` with it.
147
- pub fn add_child (
148
- & mut self ,
149
- base_offset : crate :: data:: Offset ,
150
- offset : crate :: data:: Offset ,
151
- data : T ,
152
- ) -> Result < ( ) , Error > {
153
- self . assert_is_incrementing_and_update_next_offset ( offset) ?;
154
-
155
- let next_child_index = self . child_items . len ( ) ;
156
- // SAFETY INVARIANT upheld:
157
- // - This is one of two methods that modifies `children` and future_child_offsets. Out
158
- // of the two, it is the only one that produces new indices in the system.
159
- // - This always pushes next_child_index to *either* `children` or `future_child_offsets`,
160
- // maintaining the cross-field invariant there.
161
- // - This method will always push to child_items (at the end), incrementing
162
- // future values of next_child_index. This means next_child_index is always
163
- // unique for this method call.
164
- // - As the only method producing new indices, this is the only time
165
- // next_child_index will be added to children/future_child_offsets, upholding the invariant.
166
- // - Since next_child_index will always be a valid index by the end of this method,
167
- // this always produces valid in-bounds indices, upholding the bounds invariant.
168
-
169
- if let Ok ( i) = self . child_items . binary_search_by_key ( & base_offset, |i| i. offset ) {
170
- self . child_items [ i] . children . push ( next_child_index as u32 ) ;
171
- } else if let Ok ( i) = self . root_items . binary_search_by_key ( & base_offset, |i| i. offset ) {
172
- self . root_items [ i] . children . push ( next_child_index as u32 ) ;
173
- } else {
174
- self . future_child_offsets . push ( ( base_offset, next_child_index) ) ;
175
- }
176
-
177
- self . last_seen = NodeKind :: Child . into ( ) ;
178
- self . child_items . push ( Item {
179
- offset,
180
- next_offset : 0 ,
181
- data,
182
- // SAFETY INVARIANT upheld: there are no children
183
- children : Default :: default ( ) ,
184
- } ) ;
185
- Ok ( ( ) )
186
- }
187
- }
24
+ pub use tree:: { Item , Tree } ;
188
25
189
26
#[ cfg( test) ]
190
27
mod tests {
191
- mod tree {
192
- mod from_offsets_in_pack {
193
- use std:: sync:: atomic:: AtomicBool ;
194
-
195
- use crate as pack;
196
-
197
- const SMALL_PACK_INDEX : & str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx" ;
198
- const SMALL_PACK : & str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack" ;
199
-
200
- const INDEX_V1 : & str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx" ;
201
- const PACK_FOR_INDEX_V1 : & str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack" ;
202
-
203
- use gix_testtools:: fixture_path;
204
-
205
- #[ test]
206
- fn v1 ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
207
- tree ( INDEX_V1 , PACK_FOR_INDEX_V1 )
208
- }
209
-
210
- #[ test]
211
- fn v2 ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
212
- tree ( SMALL_PACK_INDEX , SMALL_PACK )
213
- }
214
-
215
- fn tree ( index_path : & str , pack_path : & str ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
216
- let idx = pack:: index:: File :: at ( fixture_path ( index_path) , gix_hash:: Kind :: Sha1 ) ?;
217
- crate :: cache:: delta:: Tree :: from_offsets_in_pack (
218
- & fixture_path ( pack_path) ,
219
- idx. sorted_offsets ( ) . into_iter ( ) ,
220
- & |ofs| * ofs,
221
- & |id| idx. lookup ( id) . map ( |index| idx. pack_offset_at_index ( index) ) ,
222
- & mut gix_features:: progress:: Discard ,
223
- & AtomicBool :: new ( false ) ,
224
- gix_hash:: Kind :: Sha1 ,
225
- ) ?;
226
- Ok ( ( ) )
227
- }
228
- }
229
- }
230
28
231
29
#[ test]
232
30
fn size_of_pack_tree_item ( ) {
0 commit comments