Skip to content

Commit 6830e60

Browse files
committed
refactor: the same, with much less!
1 parent b7a4d17 commit 6830e60

File tree

2 files changed

+48
-69
lines changed

2 files changed

+48
-69
lines changed

gitoxide-odb/src/pack/file/read.rs

Lines changed: 47 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,12 @@ use crate::{
44
};
55
use git_object::Id;
66
use smallvec::SmallVec;
7-
use std::{convert::TryInto, ops::Range};
7+
use std::convert::TryInto;
88

99
#[derive(Debug)]
1010
struct Delta {
11-
data: Range<usize>,
12-
header_size: usize,
13-
base_size: u64,
14-
result_size: u64,
11+
decompressed_size: usize,
12+
data_offset: u64,
1513
}
1614

1715
#[derive(Debug)]
@@ -20,12 +18,6 @@ pub enum ResolvedBase {
2018
OutOfPack { end: usize },
2119
}
2220

23-
impl Delta {
24-
fn size(&self) -> usize {
25-
self.data.end - self.data.start
26-
}
27-
}
28-
2921
/// Reading of objects
3022
impl File {
3123
// Note that this method does not resolve deltified objects, but merely decompresses their content
@@ -88,43 +80,40 @@ impl File {
8880
use crate::pack::decoded::Header;
8981
// all deltas, from the one that produces the desired object (first) to the oldest at the end of the chain
9082
let mut chain = SmallVec::<[Delta; 5]>::default();
91-
let mut instruction_buffer_size = 0usize;
9283
let mut cursor = last.clone();
93-
let mut base_buffer_end: Option<usize> = None;
84+
let mut base_buffer_size: Option<usize> = None;
9485
// Find the first full base, either an undeltified object in the pack or a reference to another object.
9586
while cursor.header.is_delta() {
96-
let end = instruction_buffer_size
97-
.checked_add(cursor.size as usize)
98-
.expect("no overflow");
9987
chain.push(Delta {
100-
data: Range {
101-
start: instruction_buffer_size,
102-
end,
103-
},
104-
header_size: 0,
105-
base_size: cursor.data_offset, // keep this value around for later
106-
result_size: 0,
88+
decompressed_size: cursor
89+
.size
90+
.try_into()
91+
.expect("delta sizes small enough to fit a usize"),
92+
data_offset: cursor.data_offset,
10793
});
108-
instruction_buffer_size = end;
10994
cursor = match cursor.header {
11095
Header::OfsDelta { pack_offset } => self.entry(pack_offset),
11196
Header::RefDelta { oid } => match resolve(&oid, out) {
11297
ResolvedBase::InPack(entry) => entry,
11398
ResolvedBase::OutOfPack { end } => {
114-
base_buffer_end = Some(end);
99+
base_buffer_size = Some(end);
115100
break;
116101
}
117102
},
118103
_ => unreachable!("cursor.is_delta() only allows deltas here"),
119104
};
120105
}
121106
let (first_buffer_end, second_buffer_end) = {
122-
let delta_instructions_size: u64 = chain.iter().map(|d| d.size() as u64).sum();
123-
let base_buffer_end = match base_buffer_end {
107+
let biggest_delta_instructions_size: u64 = chain
108+
.iter()
109+
.map(|d| d.decompressed_size as u64)
110+
.max()
111+
.expect("at least one delta");
112+
let base_buffer_size = match base_buffer_size {
124113
None => {
125114
let base_entry = cursor;
126115
out.resize(
127-
(base_entry.size * 2 + delta_instructions_size) // * 2 for worst-case guess
116+
(base_entry.size * 2 + biggest_delta_instructions_size) // * 2 for worst-case guess
128117
.try_into()
129118
.expect("usize to be big enough for all deltas"),
130119
0,
@@ -135,71 +124,61 @@ impl File {
135124
.try_into()
136125
.expect("usize big enough for single entry base object size")
137126
}
138-
Some(end) => {
127+
Some(size) => {
139128
out.resize(
140-
(end as u64 * 2 // * 2 for worst-case guess
141-
+ delta_instructions_size)
129+
(size as u64 * 2 // * 2 for worst-case guess
130+
+ biggest_delta_instructions_size)
142131
.try_into()
143132
.expect("usize to be big enough for all deltas"),
144133
0,
145134
);
146-
end
135+
size
147136
}
148137
};
149-
(base_buffer_end, base_buffer_end + base_buffer_end) // works because we have two equally sized sequential regions
138+
(base_buffer_size, base_buffer_size * 2) // works because we have two equally sized sequential regions
150139
};
151140

152-
// move the instructions offsets to a range where they won't be overwritten, past the second result buffer
153-
// conceptually, `out` is: [source-buffer][target-buffer][delta-1..delta-n]
154-
for delta in chain.iter_mut() {
155-
let data = Range {
156-
start: second_buffer_end + delta.data.start,
157-
end: second_buffer_end + delta.data.end,
158-
};
159-
let buf = &mut out[data];
160-
self.decompress_entry_inner(
161-
delta.base_size, // == entry.data_offset; we just use the slot to carry over necessary information
162-
buf,
163-
)?;
164-
let (base_size, consumed) = delta_header_size(buf);
165-
delta.header_size += consumed;
166-
delta.base_size = base_size;
167-
let (result_size, consumed) = delta_header_size(&buf[consumed..]);
168-
delta.header_size += consumed;
169-
delta.result_size = result_size;
170-
}
171-
172141
// From oldest to most recent, apply all deltas, swapping the buffer back and forth
173142
// TODO: once we have more tests, we could optimize this memory-intensive work to
174143
// analyse the delta-chains to only copy data once.
175144
let (buffers, instructions) = out.split_at_mut(second_buffer_end);
176145
let (mut source_buf, mut target_buf) = buffers.split_at_mut(first_buffer_end);
177146

178-
for Delta {
179-
data,
180-
header_size,
181-
base_size,
182-
result_size,
183-
} in chain.iter().rev()
147+
// `out` is: [source-buffer][target-buffer][delta-1..delta-n]
148+
let mut last_result_size = None;
149+
for (
150+
delta_idx,
151+
Delta {
152+
decompressed_size,
153+
data_offset: pack_offset,
154+
},
155+
) in chain.iter().rev().enumerate()
184156
{
157+
let data = &mut instructions[..*decompressed_size];
158+
self.decompress_entry_inner(*pack_offset, data)?;
159+
let mut offset = 0;
160+
let (base_size, consumed) = delta_header_size(data);
161+
offset += consumed;
162+
let (result_size, consumed) = delta_header_size(&data[consumed..]);
163+
offset += consumed;
164+
if delta_idx + 1 == chain.len() {
165+
last_result_size = Some(result_size);
166+
}
185167
apply_delta(
186-
&source_buf[..*base_size as usize],
187-
&mut target_buf[..*result_size as usize],
188-
&instructions[data.start + header_size..data.end],
168+
&source_buf[..base_size as usize],
169+
&mut target_buf[..result_size as usize],
170+
&data[offset..],
189171
);
190172
// use the target as source for the next delta
191173
std::mem::swap(&mut source_buf, &mut target_buf);
192174
}
193175

194-
let result_size = chain
195-
.first()
196-
.expect("at least one delta chain item")
197-
.result_size as usize;
176+
let last_result_size = last_result_size.expect("at least one delta chain item") as usize;
198177
// uneven chains leave the target buffer after the source buffer
199178
if chain.len() % 2 == 1 {
200-
source_buf[..result_size].copy_from_slice(&target_buf[..result_size]);
179+
source_buf[..last_result_size].copy_from_slice(&target_buf[..last_result_size]);
201180
}
202-
out.resize(result_size, 0);
181+
out.resize(last_result_size, 0);
203182
Ok(())
204183
}
205184
}

gitoxide-odb/tests/pack/file.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ mod decode_entry {
4141
fn blob_ofs_delta_two_links() {
4242
let buf = decode_entry_at_offset(3033);
4343
assert_eq!(buf.len(), 173, "buffer length is the acutal object size");
44-
assert_eq!(buf.capacity(), 2381, "capacity is much higher as we allocate everything into a single, bigger, reusable buffer, which depends on base sizes");
44+
assert_eq!(buf.capacity(), 2375, "capacity is much higher as we allocate everything into a single, bigger, reusable buffer, which depends on base sizes");
4545
assert_eq!(buf.as_bstr(), b"GitPython is a python library used to interact with Git repositories.\n\nGitPython is a port of the grit library in Ruby created by \nTom Preston-Werner and Chris Wanstrath.\n\n\n".as_bstr());
4646
}
4747

0 commit comments

Comments
 (0)