Skip to content

Commit c8c1b63

Browse files
committed
Rework how blame is passed to parents
Before this commit, blame would have been passed to the first parent in cases where there was more than one. This commit changes that by only removing a particular suspect from further consideration once it has been compared to all of its parents. For hunks where blame cannot be passed to any parent, we can safely assume that they were introduced by a particular suspect, so we remove those hunks from `hunks_to_blame` and create a `BlameEntry` out of them. We can illustrate the change using the following small example history: ```text ---1----2 \ \ 3----4--- ``` Let’s now say that we’re blaming a file that has the following content: ```text line 1 # introduced by (1), then changed by (3) line 2 # introduced by (1) line 3 # introduced by (1), then changed by (2) ``` The resulting blame should look like this: ```text (3) line 1 (1) line 2 (2) line 3 ``` The previous version of the algorithm would have passed blame to just (2) or (3), depending on which one came first in the list of parents.
1 parent 8d84818 commit c8c1b63

File tree

5 files changed

+500
-517
lines changed

5 files changed

+500
-517
lines changed

gix-blame/src/file/function.rs

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -172,9 +172,10 @@ where
172172
if more_than_one_parent {
173173
// None of the changes affected the file we’re currently blaming.
174174
// Copy blame to parent.
175-
for unblamed_hunk in &mut hunks_to_blame {
176-
unblamed_hunk.clone_blame(suspect, parent_id);
177-
}
175+
hunks_to_blame = hunks_to_blame
176+
.into_iter()
177+
.map(|unblamed_hunk| unblamed_hunk.clone_blame(suspect, parent_id))
178+
.collect();
178179
} else {
179180
pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame);
180181
}
@@ -196,14 +197,59 @@ where
196197
}
197198
gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => {
198199
let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats)?;
199-
hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect);
200-
pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame);
200+
hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent_id);
201201
}
202202
}
203203
}
204-
if more_than_one_parent {
205-
for unblamed_hunk in &mut hunks_to_blame {
204+
205+
hunks_to_blame = hunks_to_blame
206+
.into_iter()
207+
.filter_map(|mut unblamed_hunk| {
208+
if unblamed_hunk.suspects.len() == 1 {
209+
if let Some(entry) = BlameEntry::from_unblamed_hunk(&unblamed_hunk, suspect) {
210+
// At this point, we have copied blame for every hunk to a parent. Hunks
211+
// that have only `suspect` left in `suspects` have not passed blame to any
212+
// parent and so they can be converted to a `BlameEntry` and moved to
213+
// `out`.
214+
out.push(entry);
215+
216+
return None;
217+
}
218+
}
219+
206220
unblamed_hunk.remove_blame(suspect);
221+
222+
Some(unblamed_hunk)
223+
})
224+
.collect();
225+
226+
// This block asserts that line ranges for each suspect never overlap. If they did overlap
227+
// this would mean that the same line in a *Source File* would map to more than one line in
228+
// the *Blamed File* and this is not possible.
229+
#[cfg(debug_assertions)]
230+
{
231+
let ranges = hunks_to_blame.iter().fold(
232+
std::collections::BTreeMap::<ObjectId, Vec<Range<u32>>>::new(),
233+
|mut acc, hunk| {
234+
for (suspect, range) in hunk.suspects.clone() {
235+
acc.entry(suspect).or_insert(Vec::new()).push(range);
236+
}
237+
238+
acc
239+
},
240+
);
241+
242+
for (_, mut ranges) in ranges {
243+
ranges.sort_by(|a, b| a.start.cmp(&b.start));
244+
245+
for window in ranges.windows(2) {
246+
match window {
247+
[a, b] => {
248+
assert!(a.end <= b.start, "#{hunks_to_blame:#?}");
249+
}
250+
_ => {}
251+
}
252+
}
207253
}
208254
}
209255
}

gix-blame/src/file/mod.rs

Lines changed: 37 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ pub(super) mod function;
1616
///
1717
/// This is the core of the blame implementation as it matches regions in *Source File* to the *Blamed File*.
1818
fn process_change(
19-
out: &mut Vec<BlameEntry>,
2019
new_hunks_to_blame: &mut Vec<UnblamedHunk>,
2120
offset: &mut Offset,
2221
suspect: ObjectId,
22+
parent: ObjectId,
2323
hunk: Option<UnblamedHunk>,
2424
change: Option<Change>,
2525
) -> (Option<UnblamedHunk>, Option<Change>) {
@@ -40,6 +40,8 @@ fn process_change(
4040
match (hunk, change) {
4141
(Some(hunk), Some(Change::Unchanged(unchanged))) => {
4242
let Some(range_in_suspect) = hunk.suspects.get(&suspect) else {
43+
// We don’t clone blame to `parent` as `suspect` has nothing to do with this
44+
// `hunk`.
4345
new_hunks_to_blame.push(hunk);
4446
return (None, Some(Change::Unchanged(unchanged)));
4547
};
@@ -64,7 +66,7 @@ fn process_change(
6466

6567
// Nothing to do with `hunk` except shifting it,
6668
// but `unchanged` needs to be checked against the next hunk to catch up.
67-
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
69+
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
6870
(None, Some(Change::Unchanged(unchanged)))
6971
}
7072
(false, false) => {
@@ -93,7 +95,7 @@ fn process_change(
9395

9496
// Nothing to do with `hunk` except shifting it,
9597
// but `unchanged` needs to be checked against the next hunk to catch up.
96-
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
98+
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
9799
(None, Some(Change::Unchanged(unchanged)))
98100
}
99101
}
@@ -123,7 +125,7 @@ fn process_change(
123125
}
124126
Either::Right((before, after)) => {
125127
// requeue the left side `before` after offsetting it…
126-
new_hunks_to_blame.push(before.shift_by(suspect, *offset));
128+
new_hunks_to_blame.push(before.clone_blame(suspect, parent).shift_by(parent, *offset));
127129
// …and treat `after` as `new_hunk`, which contains the `added` range.
128130
after
129131
}
@@ -132,20 +134,18 @@ fn process_change(
132134
*offset += added.end - added.start;
133135
*offset -= number_of_lines_deleted;
134136

135-
// The overlapping `added` section was successfully located.
136-
out.push(BlameEntry::with_offset(
137-
added.clone(),
138-
suspect,
139-
hunk_starting_at_added.offset_for(suspect),
140-
));
141-
137+
// The overlapping `added` section was successfully located.
142138
// Re-split at the end of `added` to continue with what's after.
143139
match hunk_starting_at_added.split_at(suspect, added.end) {
144-
Either::Left(_) => {
140+
Either::Left(hunk) => {
141+
new_hunks_to_blame.push(hunk);
142+
145143
// Nothing to split, so we are done with this hunk.
146144
(None, None)
147145
}
148-
Either::Right((_, after)) => {
146+
Either::Right((hunk, after)) => {
147+
new_hunks_to_blame.push(hunk);
148+
149149
// Keep processing the unblamed range after `added`
150150
(Some(after), None)
151151
}
@@ -162,17 +162,13 @@ fn process_change(
162162
Either::Left(hunk) => hunk,
163163
Either::Right((before, after)) => {
164164
// Keep looking for the left side of the unblamed portion.
165-
new_hunks_to_blame.push(before.shift_by(suspect, *offset));
165+
new_hunks_to_blame.push(before.clone_blame(suspect, parent).shift_by(parent, *offset));
166166
after
167167
}
168168
};
169169

170170
// We can 'blame' the overlapping area of `added` and `hunk`.
171-
out.push(BlameEntry::with_offset(
172-
added.start..range_in_suspect.end,
173-
suspect,
174-
hunk_starting_at_added.offset_for(suspect),
175-
));
171+
new_hunks_to_blame.push(hunk_starting_at_added);
176172
// Keep processing `added`, it's portion past `hunk` may still contribute.
177173
(None, Some(Change::AddedOrReplaced(added, number_of_lines_deleted)))
178174
}
@@ -183,18 +179,20 @@ fn process_change(
183179
// <---> (blamed)
184180
// <--> (new hunk)
185181

186-
out.push(BlameEntry::with_offset(
187-
range_in_suspect.start..added.end,
188-
suspect,
189-
hunk.offset_for(suspect),
190-
));
191-
192182
*offset += added.end - added.start;
193183
*offset -= number_of_lines_deleted;
194184

195185
match hunk.split_at(suspect, added.end) {
196-
Either::Left(_) => (None, None),
197-
Either::Right((_, after)) => (Some(after), None),
186+
Either::Left(hunk) => {
187+
new_hunks_to_blame.push(hunk);
188+
189+
(None, None)
190+
}
191+
Either::Right((before, after)) => {
192+
new_hunks_to_blame.push(before);
193+
194+
(Some(after), None)
195+
}
198196
}
199197
}
200198
(false, false) => {
@@ -222,7 +220,7 @@ fn process_change(
222220
// <----> (added)
223221

224222
// Retry `hunk` once there is overlapping changes to process.
225-
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
223+
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
226224

227225
// Let hunks catchup with this change.
228226
(
@@ -237,11 +235,7 @@ fn process_change(
237235
// <---> (blamed)
238236

239237
// Successfully blame the whole range.
240-
out.push(BlameEntry::with_offset(
241-
range_in_suspect.clone(),
242-
suspect,
243-
hunk.offset_for(suspect),
244-
));
238+
new_hunks_to_blame.push(hunk);
245239

246240
// And keep processing `added` with future `hunks` that might be affected by it.
247241
(
@@ -279,7 +273,7 @@ fn process_change(
279273
}
280274
Either::Right((before, after)) => {
281275
// `before` isn't affected by deletion, so keep it for later.
282-
new_hunks_to_blame.push(before.shift_by(suspect, *offset));
276+
new_hunks_to_blame.push(before.clone_blame(suspect, parent).shift_by(parent, *offset));
283277
// after will be affected by offset, and we will see if there are more changes affecting it.
284278
after
285279
}
@@ -291,7 +285,8 @@ fn process_change(
291285
// | (line_number_in_destination)
292286

293287
// Catchup with changes.
294-
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
288+
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
289+
295290
(
296291
None,
297292
Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted)),
@@ -300,7 +295,7 @@ fn process_change(
300295
}
301296
(Some(hunk), None) => {
302297
// nothing to do - changes are exhausted, re-evaluate `hunk`.
303-
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
298+
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
304299
(None, None)
305300
}
306301
(None, Some(Change::Unchanged(_))) => {
@@ -328,10 +323,10 @@ fn process_change(
328323
/// Consume `hunks_to_blame` and `changes` to pair up matches ranges (also overlapping) with each other.
329324
/// Once a match is found, it's pushed onto `out`.
330325
fn process_changes(
331-
out: &mut Vec<BlameEntry>,
332326
hunks_to_blame: Vec<UnblamedHunk>,
333327
changes: Vec<Change>,
334328
suspect: ObjectId,
329+
parent: ObjectId,
335330
) -> Vec<UnblamedHunk> {
336331
let mut hunks_iter = hunks_to_blame.into_iter();
337332
let mut changes_iter = changes.into_iter();
@@ -344,10 +339,10 @@ fn process_changes(
344339

345340
loop {
346341
(hunk, change) = process_change(
347-
out,
348342
&mut new_hunks_to_blame,
349343
&mut offset_in_destination,
350344
suspect,
345+
parent,
351346
hunk,
352347
change,
353348
);
@@ -407,30 +402,20 @@ impl UnblamedHunk {
407402
}
408403
}
409404

410-
fn offset_for(&self, suspect: ObjectId) -> Offset {
411-
let range_in_suspect = self
412-
.suspects
413-
.get(&suspect)
414-
.expect("Internal and we know suspect is present");
415-
416-
if self.range_in_blamed_file.start > range_in_suspect.start {
417-
Offset::Added(self.range_in_blamed_file.start - range_in_suspect.start)
418-
} else {
419-
Offset::Deleted(range_in_suspect.start - self.range_in_blamed_file.start)
420-
}
421-
}
422-
423405
/// Transfer all ranges from the commit at `from` to the commit at `to`.
424406
fn pass_blame(&mut self, from: ObjectId, to: ObjectId) {
425407
if let Some(range_in_suspect) = self.suspects.remove(&from) {
426408
self.suspects.insert(to, range_in_suspect);
427409
}
428410
}
429411

430-
fn clone_blame(&mut self, from: ObjectId, to: ObjectId) {
412+
// TODO
413+
// Should this also accept `&mut self` as the other functions do?
414+
fn clone_blame(mut self, from: ObjectId, to: ObjectId) -> Self {
431415
if let Some(range_in_suspect) = self.suspects.get(&from) {
432416
self.suspects.insert(to, range_in_suspect.clone());
433417
}
418+
self
434419
}
435420

436421
fn remove_blame(&mut self, suspect: ObjectId) {
@@ -439,36 +424,6 @@ impl UnblamedHunk {
439424
}
440425

441426
impl BlameEntry {
442-
/// Create a new instance by creating `range_in_blamed_file` after applying `offset` to `range_in_source_file`.
443-
fn with_offset(range_in_source_file: Range<u32>, commit_id: ObjectId, offset: Offset) -> Self {
444-
debug_assert!(
445-
range_in_source_file.end > range_in_source_file.start,
446-
"{range_in_source_file:?}"
447-
);
448-
449-
match offset {
450-
Offset::Added(added) => Self {
451-
start_in_blamed_file: range_in_source_file.start + added,
452-
start_in_source_file: range_in_source_file.start,
453-
len: force_non_zero(range_in_source_file.len() as u32),
454-
commit_id,
455-
},
456-
Offset::Deleted(deleted) => {
457-
debug_assert!(
458-
range_in_source_file.start >= deleted,
459-
"{range_in_source_file:?} {offset:?}"
460-
);
461-
462-
Self {
463-
start_in_blamed_file: range_in_source_file.start - deleted,
464-
start_in_source_file: range_in_source_file.start,
465-
len: force_non_zero(range_in_source_file.len() as u32),
466-
commit_id,
467-
}
468-
}
469-
}
470-
}
471-
472427
/// Create an offset from a portion of the *Blamed File*.
473428
fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Option<Self> {
474429
let range_in_source_file = unblamed_hunk.suspects.get(&commit_id)?;

0 commit comments

Comments
 (0)