Skip to content

Rework how blame is passed to parents #1751

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 50 additions & 7 deletions gix-blame/src/file/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,10 @@ where
if more_than_one_parent {
// None of the changes affected the file we’re currently blaming.
// Copy blame to parent.
for unblamed_hunk in &mut hunks_to_blame {
unblamed_hunk.clone_blame(suspect, parent_id);
}
hunks_to_blame = hunks_to_blame
.into_iter()
.map(|unblamed_hunk| unblamed_hunk.clone_blame(suspect, parent_id))
.collect();
} else {
pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame);
}
Expand All @@ -196,14 +197,56 @@ where
}
gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => {
let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats)?;
hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect);
pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame);
hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent_id);
}
}
}
if more_than_one_parent {
for unblamed_hunk in &mut hunks_to_blame {

hunks_to_blame = hunks_to_blame
.into_iter()
.filter_map(|mut unblamed_hunk| {
if unblamed_hunk.suspects.len() == 1 {
if let Some(entry) = BlameEntry::from_unblamed_hunk(&unblamed_hunk, suspect) {
// At this point, we have copied blame for every hunk to a parent. Hunks
// that have only `suspect` left in `suspects` have not passed blame to any
// parent and so they can be converted to a `BlameEntry` and moved to
// `out`.
out.push(entry);

return None;
}
}

unblamed_hunk.remove_blame(suspect);

Some(unblamed_hunk)
})
.collect();

// This block asserts that line ranges for each suspect never overlap. If they did overlap
// this would mean that the same line in a *Source File* would map to more than one line in
// the *Blamed File* and this is not possible.
#[cfg(debug_assertions)]
{
let ranges = hunks_to_blame.iter().fold(
std::collections::BTreeMap::<ObjectId, Vec<Range<u32>>>::new(),
|mut acc, hunk| {
for (suspect, range) in hunk.suspects.clone() {
acc.entry(suspect).or_default().push(range);
}

acc
},
);

for (_, mut ranges) in ranges {
ranges.sort_by(|a, b| a.start.cmp(&b.start));

for window in ranges.windows(2) {
if let [a, b] = window {
assert!(a.end <= b.start, "#{hunks_to_blame:#?}");
}
}
}
}
}
Expand Down
119 changes: 37 additions & 82 deletions gix-blame/src/file/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ pub(super) mod function;
///
/// This is the core of the blame implementation as it matches regions in *Source File* to the *Blamed File*.
fn process_change(
out: &mut Vec<BlameEntry>,
new_hunks_to_blame: &mut Vec<UnblamedHunk>,
offset: &mut Offset,
suspect: ObjectId,
parent: ObjectId,
hunk: Option<UnblamedHunk>,
change: Option<Change>,
) -> (Option<UnblamedHunk>, Option<Change>) {
Expand All @@ -40,6 +40,8 @@ fn process_change(
match (hunk, change) {
(Some(hunk), Some(Change::Unchanged(unchanged))) => {
let Some(range_in_suspect) = hunk.suspects.get(&suspect) else {
// We don’t clone blame to `parent` as `suspect` has nothing to do with this
// `hunk`.
new_hunks_to_blame.push(hunk);
return (None, Some(Change::Unchanged(unchanged)));
};
Expand All @@ -64,7 +66,7 @@ fn process_change(

// Nothing to do with `hunk` except shifting it,
// but `unchanged` needs to be checked against the next hunk to catch up.
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
(None, Some(Change::Unchanged(unchanged)))
}
(false, false) => {
Expand Down Expand Up @@ -93,7 +95,7 @@ fn process_change(

// Nothing to do with `hunk` except shifting it,
// but `unchanged` needs to be checked against the next hunk to catch up.
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
(None, Some(Change::Unchanged(unchanged)))
}
}
Expand Down Expand Up @@ -123,7 +125,7 @@ fn process_change(
}
Either::Right((before, after)) => {
// requeue the left side `before` after offsetting it…
new_hunks_to_blame.push(before.shift_by(suspect, *offset));
new_hunks_to_blame.push(before.clone_blame(suspect, parent).shift_by(parent, *offset));
// …and treat `after` as `new_hunk`, which contains the `added` range.
after
}
Expand All @@ -132,20 +134,18 @@ fn process_change(
*offset += added.end - added.start;
*offset -= number_of_lines_deleted;

// The overlapping `added` section was successfully located.
out.push(BlameEntry::with_offset(
added.clone(),
suspect,
hunk_starting_at_added.offset_for(suspect),
));

// The overlapping `added` section was successfully located.
// Re-split at the end of `added` to continue with what's after.
match hunk_starting_at_added.split_at(suspect, added.end) {
Either::Left(_) => {
Either::Left(hunk) => {
new_hunks_to_blame.push(hunk);

// Nothing to split, so we are done with this hunk.
(None, None)
}
Either::Right((_, after)) => {
Either::Right((hunk, after)) => {
new_hunks_to_blame.push(hunk);

// Keep processing the unblamed range after `added`
(Some(after), None)
}
Expand All @@ -162,17 +162,13 @@ fn process_change(
Either::Left(hunk) => hunk,
Either::Right((before, after)) => {
// Keep looking for the left side of the unblamed portion.
new_hunks_to_blame.push(before.shift_by(suspect, *offset));
new_hunks_to_blame.push(before.clone_blame(suspect, parent).shift_by(parent, *offset));
after
}
};

// We can 'blame' the overlapping area of `added` and `hunk`.
out.push(BlameEntry::with_offset(
added.start..range_in_suspect.end,
suspect,
hunk_starting_at_added.offset_for(suspect),
));
new_hunks_to_blame.push(hunk_starting_at_added);
// Keep processing `added`, it's portion past `hunk` may still contribute.
(None, Some(Change::AddedOrReplaced(added, number_of_lines_deleted)))
}
Expand All @@ -183,18 +179,20 @@ fn process_change(
// <---> (blamed)
// <--> (new hunk)

out.push(BlameEntry::with_offset(
range_in_suspect.start..added.end,
suspect,
hunk.offset_for(suspect),
));

*offset += added.end - added.start;
*offset -= number_of_lines_deleted;

match hunk.split_at(suspect, added.end) {
Either::Left(_) => (None, None),
Either::Right((_, after)) => (Some(after), None),
Either::Left(hunk) => {
new_hunks_to_blame.push(hunk);

(None, None)
}
Either::Right((before, after)) => {
new_hunks_to_blame.push(before);

(Some(after), None)
}
}
}
(false, false) => {
Expand Down Expand Up @@ -222,7 +220,7 @@ fn process_change(
// <----> (added)

// Retry `hunk` once there is overlapping changes to process.
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));

// Let hunks catchup with this change.
(
Expand All @@ -237,11 +235,7 @@ fn process_change(
// <---> (blamed)

// Successfully blame the whole range.
out.push(BlameEntry::with_offset(
range_in_suspect.clone(),
suspect,
hunk.offset_for(suspect),
));
new_hunks_to_blame.push(hunk);

// And keep processing `added` with future `hunks` that might be affected by it.
(
Expand Down Expand Up @@ -279,7 +273,7 @@ fn process_change(
}
Either::Right((before, after)) => {
// `before` isn't affected by deletion, so keep it for later.
new_hunks_to_blame.push(before.shift_by(suspect, *offset));
new_hunks_to_blame.push(before.clone_blame(suspect, parent).shift_by(parent, *offset));
// after will be affected by offset, and we will see if there are more changes affecting it.
after
}
Expand All @@ -291,7 +285,8 @@ fn process_change(
// | (line_number_in_destination)

// Catchup with changes.
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));

(
None,
Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted)),
Expand All @@ -300,7 +295,7 @@ fn process_change(
}
(Some(hunk), None) => {
// nothing to do - changes are exhausted, re-evaluate `hunk`.
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
(None, None)
}
(None, Some(Change::Unchanged(_))) => {
Expand Down Expand Up @@ -328,10 +323,10 @@ fn process_change(
/// Consume `hunks_to_blame` and `changes` to pair up matches ranges (also overlapping) with each other.
/// Once a match is found, it's pushed onto `out`.
fn process_changes(
out: &mut Vec<BlameEntry>,
hunks_to_blame: Vec<UnblamedHunk>,
changes: Vec<Change>,
suspect: ObjectId,
parent: ObjectId,
) -> Vec<UnblamedHunk> {
let mut hunks_iter = hunks_to_blame.into_iter();
let mut changes_iter = changes.into_iter();
Expand All @@ -344,10 +339,10 @@ fn process_changes(

loop {
(hunk, change) = process_change(
out,
&mut new_hunks_to_blame,
&mut offset_in_destination,
suspect,
parent,
hunk,
change,
);
Expand Down Expand Up @@ -407,30 +402,20 @@ impl UnblamedHunk {
}
}

fn offset_for(&self, suspect: ObjectId) -> Offset {
let range_in_suspect = self
.suspects
.get(&suspect)
.expect("Internal and we know suspect is present");

if self.range_in_blamed_file.start > range_in_suspect.start {
Offset::Added(self.range_in_blamed_file.start - range_in_suspect.start)
} else {
Offset::Deleted(range_in_suspect.start - self.range_in_blamed_file.start)
}
}

/// Transfer all ranges from the commit at `from` to the commit at `to`.
fn pass_blame(&mut self, from: ObjectId, to: ObjectId) {
if let Some(range_in_suspect) = self.suspects.remove(&from) {
self.suspects.insert(to, range_in_suspect);
}
}

fn clone_blame(&mut self, from: ObjectId, to: ObjectId) {
// TODO
// Should this also accept `&mut self` as the other functions do?
fn clone_blame(mut self, from: ObjectId, to: ObjectId) -> Self {
if let Some(range_in_suspect) = self.suspects.get(&from) {
self.suspects.insert(to, range_in_suspect.clone());
}
self
}

fn remove_blame(&mut self, suspect: ObjectId) {
Expand All @@ -439,36 +424,6 @@ impl UnblamedHunk {
}

impl BlameEntry {
/// Create a new instance by creating `range_in_blamed_file` after applying `offset` to `range_in_source_file`.
fn with_offset(range_in_source_file: Range<u32>, commit_id: ObjectId, offset: Offset) -> Self {
debug_assert!(
range_in_source_file.end > range_in_source_file.start,
"{range_in_source_file:?}"
);

match offset {
Offset::Added(added) => Self {
start_in_blamed_file: range_in_source_file.start + added,
start_in_source_file: range_in_source_file.start,
len: force_non_zero(range_in_source_file.len() as u32),
commit_id,
},
Offset::Deleted(deleted) => {
debug_assert!(
range_in_source_file.start >= deleted,
"{range_in_source_file:?} {offset:?}"
);

Self {
start_in_blamed_file: range_in_source_file.start - deleted,
start_in_source_file: range_in_source_file.start,
len: force_non_zero(range_in_source_file.len() as u32),
commit_id,
}
}
}
}

/// Create an offset from a portion of the *Blamed File*.
fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Option<Self> {
let range_in_source_file = unblamed_hunk.suspects.get(&commit_id)?;
Expand Down
Loading
Loading