Skip to content

Commit dceb47d

Browse files
authored
Merge pull request #1751 from cruessler/wip-changes-against-more-than-one-parent
Rework how blame is passed to parents
2 parents 1ca480a + cbf7f51 commit dceb47d

File tree

5 files changed

+497
-517
lines changed

5 files changed

+497
-517
lines changed

gix-blame/src/file/function.rs

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -172,9 +172,10 @@ where
172172
if more_than_one_parent {
173173
// None of the changes affected the file we’re currently blaming.
174174
// Copy blame to parent.
175-
for unblamed_hunk in &mut hunks_to_blame {
176-
unblamed_hunk.clone_blame(suspect, parent_id);
177-
}
175+
hunks_to_blame = hunks_to_blame
176+
.into_iter()
177+
.map(|unblamed_hunk| unblamed_hunk.clone_blame(suspect, parent_id))
178+
.collect();
178179
} else {
179180
pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame);
180181
}
@@ -196,14 +197,56 @@ where
196197
}
197198
gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => {
198199
let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats)?;
199-
hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect);
200-
pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame);
200+
hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent_id);
201201
}
202202
}
203203
}
204-
if more_than_one_parent {
205-
for unblamed_hunk in &mut hunks_to_blame {
204+
205+
hunks_to_blame = hunks_to_blame
206+
.into_iter()
207+
.filter_map(|mut unblamed_hunk| {
208+
if unblamed_hunk.suspects.len() == 1 {
209+
if let Some(entry) = BlameEntry::from_unblamed_hunk(&unblamed_hunk, suspect) {
210+
// At this point, we have copied blame for every hunk to a parent. Hunks
211+
// that have only `suspect` left in `suspects` have not passed blame to any
212+
// parent and so they can be converted to a `BlameEntry` and moved to
213+
// `out`.
214+
out.push(entry);
215+
216+
return None;
217+
}
218+
}
219+
206220
unblamed_hunk.remove_blame(suspect);
221+
222+
Some(unblamed_hunk)
223+
})
224+
.collect();
225+
226+
// This block asserts that line ranges for each suspect never overlap. If they did overlap
227+
// this would mean that the same line in a *Source File* would map to more than one line in
228+
// the *Blamed File* and this is not possible.
229+
#[cfg(debug_assertions)]
230+
{
231+
let ranges = hunks_to_blame.iter().fold(
232+
std::collections::BTreeMap::<ObjectId, Vec<Range<u32>>>::new(),
233+
|mut acc, hunk| {
234+
for (suspect, range) in hunk.suspects.clone() {
235+
acc.entry(suspect).or_default().push(range);
236+
}
237+
238+
acc
239+
},
240+
);
241+
242+
for (_, mut ranges) in ranges {
243+
ranges.sort_by(|a, b| a.start.cmp(&b.start));
244+
245+
for window in ranges.windows(2) {
246+
if let [a, b] = window {
247+
assert!(a.end <= b.start, "#{hunks_to_blame:#?}");
248+
}
249+
}
207250
}
208251
}
209252
}

gix-blame/src/file/mod.rs

Lines changed: 37 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ pub(super) mod function;
1616
///
1717
/// This is the core of the blame implementation as it matches regions in *Source File* to the *Blamed File*.
1818
fn process_change(
19-
out: &mut Vec<BlameEntry>,
2019
new_hunks_to_blame: &mut Vec<UnblamedHunk>,
2120
offset: &mut Offset,
2221
suspect: ObjectId,
22+
parent: ObjectId,
2323
hunk: Option<UnblamedHunk>,
2424
change: Option<Change>,
2525
) -> (Option<UnblamedHunk>, Option<Change>) {
@@ -40,6 +40,8 @@ fn process_change(
4040
match (hunk, change) {
4141
(Some(hunk), Some(Change::Unchanged(unchanged))) => {
4242
let Some(range_in_suspect) = hunk.suspects.get(&suspect) else {
43+
// We don’t clone blame to `parent` as `suspect` has nothing to do with this
44+
// `hunk`.
4345
new_hunks_to_blame.push(hunk);
4446
return (None, Some(Change::Unchanged(unchanged)));
4547
};
@@ -64,7 +66,7 @@ fn process_change(
6466

6567
// Nothing to do with `hunk` except shifting it,
6668
// but `unchanged` needs to be checked against the next hunk to catch up.
67-
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
69+
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
6870
(None, Some(Change::Unchanged(unchanged)))
6971
}
7072
(false, false) => {
@@ -93,7 +95,7 @@ fn process_change(
9395

9496
// Nothing to do with `hunk` except shifting it,
9597
// but `unchanged` needs to be checked against the next hunk to catch up.
96-
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
98+
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
9799
(None, Some(Change::Unchanged(unchanged)))
98100
}
99101
}
@@ -123,7 +125,7 @@ fn process_change(
123125
}
124126
Either::Right((before, after)) => {
125127
// requeue the left side `before` after offsetting it…
126-
new_hunks_to_blame.push(before.shift_by(suspect, *offset));
128+
new_hunks_to_blame.push(before.clone_blame(suspect, parent).shift_by(parent, *offset));
127129
// …and treat `after` as `new_hunk`, which contains the `added` range.
128130
after
129131
}
@@ -132,20 +134,18 @@ fn process_change(
132134
*offset += added.end - added.start;
133135
*offset -= number_of_lines_deleted;
134136

135-
// The overlapping `added` section was successfully located.
136-
out.push(BlameEntry::with_offset(
137-
added.clone(),
138-
suspect,
139-
hunk_starting_at_added.offset_for(suspect),
140-
));
141-
137+
// The overlapping `added` section was successfully located.
142138
// Re-split at the end of `added` to continue with what's after.
143139
match hunk_starting_at_added.split_at(suspect, added.end) {
144-
Either::Left(_) => {
140+
Either::Left(hunk) => {
141+
new_hunks_to_blame.push(hunk);
142+
145143
// Nothing to split, so we are done with this hunk.
146144
(None, None)
147145
}
148-
Either::Right((_, after)) => {
146+
Either::Right((hunk, after)) => {
147+
new_hunks_to_blame.push(hunk);
148+
149149
// Keep processing the unblamed range after `added`
150150
(Some(after), None)
151151
}
@@ -162,17 +162,13 @@ fn process_change(
162162
Either::Left(hunk) => hunk,
163163
Either::Right((before, after)) => {
164164
// Keep looking for the left side of the unblamed portion.
165-
new_hunks_to_blame.push(before.shift_by(suspect, *offset));
165+
new_hunks_to_blame.push(before.clone_blame(suspect, parent).shift_by(parent, *offset));
166166
after
167167
}
168168
};
169169

170170
// We can 'blame' the overlapping area of `added` and `hunk`.
171-
out.push(BlameEntry::with_offset(
172-
added.start..range_in_suspect.end,
173-
suspect,
174-
hunk_starting_at_added.offset_for(suspect),
175-
));
171+
new_hunks_to_blame.push(hunk_starting_at_added);
176172
// Keep processing `added`, it's portion past `hunk` may still contribute.
177173
(None, Some(Change::AddedOrReplaced(added, number_of_lines_deleted)))
178174
}
@@ -183,18 +179,20 @@ fn process_change(
183179
// <---> (blamed)
184180
// <--> (new hunk)
185181

186-
out.push(BlameEntry::with_offset(
187-
range_in_suspect.start..added.end,
188-
suspect,
189-
hunk.offset_for(suspect),
190-
));
191-
192182
*offset += added.end - added.start;
193183
*offset -= number_of_lines_deleted;
194184

195185
match hunk.split_at(suspect, added.end) {
196-
Either::Left(_) => (None, None),
197-
Either::Right((_, after)) => (Some(after), None),
186+
Either::Left(hunk) => {
187+
new_hunks_to_blame.push(hunk);
188+
189+
(None, None)
190+
}
191+
Either::Right((before, after)) => {
192+
new_hunks_to_blame.push(before);
193+
194+
(Some(after), None)
195+
}
198196
}
199197
}
200198
(false, false) => {
@@ -222,7 +220,7 @@ fn process_change(
222220
// <----> (added)
223221

224222
// Retry `hunk` once there is overlapping changes to process.
225-
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
223+
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
226224

227225
// Let hunks catchup with this change.
228226
(
@@ -237,11 +235,7 @@ fn process_change(
237235
// <---> (blamed)
238236

239237
// Successfully blame the whole range.
240-
out.push(BlameEntry::with_offset(
241-
range_in_suspect.clone(),
242-
suspect,
243-
hunk.offset_for(suspect),
244-
));
238+
new_hunks_to_blame.push(hunk);
245239

246240
// And keep processing `added` with future `hunks` that might be affected by it.
247241
(
@@ -279,7 +273,7 @@ fn process_change(
279273
}
280274
Either::Right((before, after)) => {
281275
// `before` isn't affected by deletion, so keep it for later.
282-
new_hunks_to_blame.push(before.shift_by(suspect, *offset));
276+
new_hunks_to_blame.push(before.clone_blame(suspect, parent).shift_by(parent, *offset));
283277
// after will be affected by offset, and we will see if there are more changes affecting it.
284278
after
285279
}
@@ -291,7 +285,8 @@ fn process_change(
291285
// | (line_number_in_destination)
292286

293287
// Catchup with changes.
294-
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
288+
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
289+
295290
(
296291
None,
297292
Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted)),
@@ -300,7 +295,7 @@ fn process_change(
300295
}
301296
(Some(hunk), None) => {
302297
// nothing to do - changes are exhausted, re-evaluate `hunk`.
303-
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
298+
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
304299
(None, None)
305300
}
306301
(None, Some(Change::Unchanged(_))) => {
@@ -328,10 +323,10 @@ fn process_change(
328323
/// Consume `hunks_to_blame` and `changes` to pair up matches ranges (also overlapping) with each other.
329324
/// Once a match is found, it's pushed onto `out`.
330325
fn process_changes(
331-
out: &mut Vec<BlameEntry>,
332326
hunks_to_blame: Vec<UnblamedHunk>,
333327
changes: Vec<Change>,
334328
suspect: ObjectId,
329+
parent: ObjectId,
335330
) -> Vec<UnblamedHunk> {
336331
let mut hunks_iter = hunks_to_blame.into_iter();
337332
let mut changes_iter = changes.into_iter();
@@ -344,10 +339,10 @@ fn process_changes(
344339

345340
loop {
346341
(hunk, change) = process_change(
347-
out,
348342
&mut new_hunks_to_blame,
349343
&mut offset_in_destination,
350344
suspect,
345+
parent,
351346
hunk,
352347
change,
353348
);
@@ -407,30 +402,20 @@ impl UnblamedHunk {
407402
}
408403
}
409404

410-
fn offset_for(&self, suspect: ObjectId) -> Offset {
411-
let range_in_suspect = self
412-
.suspects
413-
.get(&suspect)
414-
.expect("Internal and we know suspect is present");
415-
416-
if self.range_in_blamed_file.start > range_in_suspect.start {
417-
Offset::Added(self.range_in_blamed_file.start - range_in_suspect.start)
418-
} else {
419-
Offset::Deleted(range_in_suspect.start - self.range_in_blamed_file.start)
420-
}
421-
}
422-
423405
/// Transfer all ranges from the commit at `from` to the commit at `to`.
424406
fn pass_blame(&mut self, from: ObjectId, to: ObjectId) {
425407
if let Some(range_in_suspect) = self.suspects.remove(&from) {
426408
self.suspects.insert(to, range_in_suspect);
427409
}
428410
}
429411

430-
fn clone_blame(&mut self, from: ObjectId, to: ObjectId) {
412+
// TODO
413+
// Should this also accept `&mut self` as the other functions do?
414+
fn clone_blame(mut self, from: ObjectId, to: ObjectId) -> Self {
431415
if let Some(range_in_suspect) = self.suspects.get(&from) {
432416
self.suspects.insert(to, range_in_suspect.clone());
433417
}
418+
self
434419
}
435420

436421
fn remove_blame(&mut self, suspect: ObjectId) {
@@ -439,36 +424,6 @@ impl UnblamedHunk {
439424
}
440425

441426
impl BlameEntry {
442-
/// Create a new instance by creating `range_in_blamed_file` after applying `offset` to `range_in_source_file`.
443-
fn with_offset(range_in_source_file: Range<u32>, commit_id: ObjectId, offset: Offset) -> Self {
444-
debug_assert!(
445-
range_in_source_file.end > range_in_source_file.start,
446-
"{range_in_source_file:?}"
447-
);
448-
449-
match offset {
450-
Offset::Added(added) => Self {
451-
start_in_blamed_file: range_in_source_file.start + added,
452-
start_in_source_file: range_in_source_file.start,
453-
len: force_non_zero(range_in_source_file.len() as u32),
454-
commit_id,
455-
},
456-
Offset::Deleted(deleted) => {
457-
debug_assert!(
458-
range_in_source_file.start >= deleted,
459-
"{range_in_source_file:?} {offset:?}"
460-
);
461-
462-
Self {
463-
start_in_blamed_file: range_in_source_file.start - deleted,
464-
start_in_source_file: range_in_source_file.start,
465-
len: force_non_zero(range_in_source_file.len() as u32),
466-
commit_id,
467-
}
468-
}
469-
}
470-
}
471-
472427
/// Create an offset from a portion of the *Blamed File*.
473428
fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Option<Self> {
474429
let range_in_source_file = unblamed_hunk.suspects.get(&commit_id)?;

0 commit comments

Comments
 (0)