Skip to content

various improvements #1913

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions gix-diff/src/blob/pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,13 @@ impl WorktreeRoots {
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
pub enum Data {
/// The data to use for diffing was written into the buffer that was passed during the call to [`Pipeline::convert_to_diffable()`].
Buffer,
Buffer {
/// If `true`, a [binary to text filter](Driver::binary_to_text_command) was used to obtain the buffer,
/// making it a derived value.
///
/// Applications should check for this to avoid treating the buffer content as (original) resource content.
is_derived: bool,
},
/// The size that the binary blob had at the given revision, without having applied filters, as it's either
/// considered binary or above the big-file threshold.
///
Expand Down Expand Up @@ -274,7 +280,7 @@ impl Pipeline {
})?;
target.map(|target| {
out.extend_from_slice(gix_path::into_bstr(target).as_ref());
Data::Buffer
Data::Buffer { is_derived: false }
})
} else {
let need_size_only = is_binary == Some(true);
Expand Down Expand Up @@ -312,7 +318,7 @@ impl Pipeline {
None
} else {
run_cmd(rela_path, cmd, out)?;
Some(Data::Buffer)
Some(Data::Buffer { is_derived: true })
}
}
None => {
Expand Down Expand Up @@ -370,7 +376,7 @@ impl Pipeline {
out.clear();
Data::Binary { size }
} else {
Data::Buffer
Data::Buffer { is_derived: false }
})
}
None => None,
Expand Down Expand Up @@ -403,6 +409,7 @@ impl Pipeline {
.try_find(id, out)
.map_err(gix_object::find::existing_object::Error::Find)?
.ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?;
let mut is_derived = false;
if matches!(mode, EntryKind::Blob | EntryKind::BlobExecutable)
&& convert == Mode::ToWorktreeAndBinaryToText
|| (convert == Mode::ToGitUnlessBinaryToTextIsPresent
Expand Down Expand Up @@ -460,6 +467,7 @@ impl Pipeline {
})?;
out.clear();
run_cmd(rela_path, cmd, out)?;
is_derived = true;
}
None => match res {
ToWorktreeOutcome::Unchanged(_) => {}
Expand Down Expand Up @@ -490,7 +498,7 @@ impl Pipeline {
out.clear();
Data::Binary { size }
} else {
Data::Buffer
Data::Buffer { is_derived }
}
};
Some(data)
Expand Down
48 changes: 38 additions & 10 deletions gix-diff/src/blob/platform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,10 @@ pub mod resource {
Resource {
driver_index: value.conversion.driver_index,
data: value.conversion.data.map_or(Data::Missing, |data| match data {
pipeline::Data::Buffer => Data::Buffer(&value.buffer),
pipeline::Data::Buffer { is_derived } => Data::Buffer {
buf: &value.buffer,
is_derived,
},
pipeline::Data::Binary { size } => Data::Binary { size },
}),
mode: value.mode,
Expand Down Expand Up @@ -137,7 +140,15 @@ pub mod resource {
/// The object is missing, either because it didn't exist in the working tree or because its `id` was null.
Missing,
/// The textual data as processed to be in a diffable state.
Buffer(&'a [u8]),
Buffer {
/// The buffer bytes.
buf: &'a [u8],
/// If `true`, a [binary to text filter](super::super::Driver::binary_to_text_command) was used to obtain the buffer,
/// making it a derived value.
///
/// Applications should check for this to avoid treating the buffer content as (original) resource content.
is_derived: bool,
},
/// The size that the binary blob had at the given revision, without having applied filters, as it's either
/// considered binary or above the big-file threshold.
///
Expand All @@ -156,10 +167,18 @@ pub mod resource {
/// Return ourselves as slice of bytes if this instance stores data.
pub fn as_slice(&self) -> Option<&'a [u8]> {
match self {
Data::Buffer(d) => Some(d),
Data::Buffer { buf, .. } => Some(buf),
Data::Binary { .. } | Data::Missing => None,
}
}

/// Returns `true` if the data in this instance is derived.
pub fn is_derived(&self) -> bool {
match self {
Data::Missing | Data::Binary { .. } => false,
Data::Buffer { is_derived, .. } => *is_derived,
}
}
}
}

Expand Down Expand Up @@ -194,9 +213,8 @@ pub mod set_resource {

///
pub mod prepare_diff {
use bstr::BStr;

use crate::blob::platform::Resource;
use bstr::BStr;

/// The kind of operation that should be performed based on the configuration of the resources involved in the diff.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
Expand Down Expand Up @@ -234,6 +252,11 @@ pub mod prepare_diff {
pub struct Outcome<'a> {
/// The kind of diff that was actually performed. This may include skipping the internal diff as well.
pub operation: Operation<'a>,
/// If `true`, a [binary to text filter](super::super::Driver::binary_to_text_command) was used to obtain the buffer
/// of `old` or `new`, making it a derived value.
///
/// Applications should check for this to avoid treating the buffer content as (original) resource content.
pub old_or_new_is_derived: bool,
/// The old or source of the diff operation.
pub old: Resource<'a>,
/// The new or destination of the diff operation.
Expand Down Expand Up @@ -421,7 +444,7 @@ impl Platform {
cmd.args(["/dev/null", ".", "."]);
None
}
resource::Data::Buffer(buf) => {
resource::Data::Buffer { buf, is_derived: _ } => {
let mut tmp = gix_tempfile::new(
std::env::temp_dir(),
gix_tempfile::ContainingDirectory::Exists,
Expand Down Expand Up @@ -524,10 +547,15 @@ impl Platform {
.diff_cache
.get(new_key)
.ok_or(prepare_diff::Error::SourceOrDestinationUnset)?;
let mut out = prepare_diff::Outcome {
operation: prepare_diff::Operation::SourceOrDestinationIsBinary,
old: Resource::new(old_key, old),
new: Resource::new(new_key, new),
let mut out = {
let old = Resource::new(old_key, old);
let new = Resource::new(new_key, new);
prepare_diff::Outcome {
operation: prepare_diff::Operation::SourceOrDestinationIsBinary,
old_or_new_is_derived: old.data.is_derived() || new.data.is_derived(),
old,
new,
}
};

match (old.conversion.data, new.conversion.data) {
Expand Down
59 changes: 37 additions & 22 deletions gix-diff/tests/diff/blob/pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ pub(crate) mod convert_to_diffable {
&mut buf,
)?;
assert!(out.driver_index.is_none(), "there was no driver");
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false }));
assert_eq!(buf.as_bstr(), a_content, "there is no transformations configured");

let link_name = "link";
Expand All @@ -62,7 +62,7 @@ pub(crate) mod convert_to_diffable {
)?;

assert!(out.driver_index.is_none());
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false }));
assert_eq!(
buf.as_bstr(),
a_name,
Expand All @@ -86,7 +86,7 @@ pub(crate) mod convert_to_diffable {
)?;

assert!(out.driver_index.is_none(), "there was no driver");
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false }));
assert_eq!(buf.as_bstr(), b_content, "there is no transformations configured");
}

Expand Down Expand Up @@ -203,7 +203,7 @@ pub(crate) mod convert_to_diffable {
assert!(out.driver_index.is_none());
assert_eq!(
out.data,
Some(pipeline::Data::Buffer),
Some(pipeline::Data::Buffer { is_derived: false }),
"links are always read and never considered large"
);
assert_eq!(buf.as_bstr(), large_content);
Expand Down Expand Up @@ -340,7 +340,7 @@ pub(crate) mod convert_to_diffable {
&mut buf,
)?;
assert!(out.driver_index.is_none(), "there was no driver");
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false }));
assert_eq!(
buf.as_bstr(),
a_content,
Expand All @@ -361,7 +361,7 @@ pub(crate) mod convert_to_diffable {
&mut buf,
)?;
assert!(out.driver_index.is_none(), "there was no driver");
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false }));
assert_eq!(
buf.as_bstr(),
"a\nb",
Expand All @@ -386,7 +386,7 @@ pub(crate) mod convert_to_diffable {
)?;

assert!(out.driver_index.is_none());
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false }));
assert_eq!(
buf.as_bstr(),
link_content,
Expand All @@ -411,7 +411,7 @@ pub(crate) mod convert_to_diffable {
)?;

assert!(out.driver_index.is_none(), "there was no driver");
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false }));
assert_eq!(buf.as_bstr(), "b-content\r\n", "LF to CRLF by worktree filtering");

let mut db = ObjectDb::default();
Expand All @@ -429,7 +429,7 @@ pub(crate) mod convert_to_diffable {
)?;

assert!(out.driver_index.is_none(), "there was no driver");
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false }));
assert_eq!(buf.as_bstr(), "b\n", "no filtering was performed at all");

Ok(())
Expand Down Expand Up @@ -524,11 +524,11 @@ pub(crate) mod convert_to_diffable {
)?;

assert_eq!(out.driver_index, Some(0));
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: true }));
assert_eq!(
buf.as_bstr(),
"\0b",
"if binary-text-conversion is set, we don't care if it outputs zero bytes, let everything pass"
"if binary-text-conversion is set, we don't care if it outputs null-bytes, let everything pass"
);

Ok(())
Expand Down Expand Up @@ -613,7 +613,12 @@ pub(crate) mod convert_to_diffable {
&mut buf,
)?;
assert_eq!(out.driver_index, Some(0));
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(
out.data,
Some(pipeline::Data::Buffer {
is_derived: !matches!(mode, pipeline::Mode::ToGit)
})
);
assert_eq!(buf.as_bstr(), "to-text\na\n", "filter was applied");
}

Expand All @@ -630,7 +635,7 @@ pub(crate) mod convert_to_diffable {
&mut buf,
)?;
assert_eq!(out.driver_index, Some(0));
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false }));
assert_eq!(buf.as_bstr(), "a\n", "unconditionally use git according to mode");

let id = db.insert("a\n");
Expand All @@ -648,7 +653,7 @@ pub(crate) mod convert_to_diffable {
&mut buf,
)?;
assert_eq!(out.driver_index, Some(0));
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: true }));
assert_eq!(buf.as_bstr(), "to-text\na\n", "filter was applied");
}

Expand All @@ -665,7 +670,7 @@ pub(crate) mod convert_to_diffable {
&mut buf,
)?;
assert_eq!(out.driver_index, Some(0));
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false }));
assert_eq!(
buf.as_bstr(),
"a\n",
Expand Down Expand Up @@ -723,7 +728,7 @@ pub(crate) mod convert_to_diffable {
&mut buf,
)?;
assert_eq!(out.driver_index, Some(4), "despite missing, we get driver information");
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false }));
assert_eq!(
buf.as_bstr(),
"link-target",
Expand Down Expand Up @@ -796,7 +801,12 @@ pub(crate) mod convert_to_diffable {
&mut buf,
)?;
assert_eq!(out.driver_index, Some(2));
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(
out.data,
Some(pipeline::Data::Buffer {
is_derived: !matches!(mode, pipeline::Mode::ToGit)
})
);
assert_eq!(
buf.as_bstr(),
"to-text\nc\n",
Expand All @@ -819,7 +829,7 @@ pub(crate) mod convert_to_diffable {
&mut buf,
)?;
assert_eq!(out.driver_index, Some(2));
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: true }));
assert_eq!(
buf.as_bstr(),
"to-text\nc\n",
Expand Down Expand Up @@ -895,7 +905,12 @@ pub(crate) mod convert_to_diffable {
&mut buf,
)?;
assert_eq!(out.driver_index, Some(3));
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(
out.data,
Some(pipeline::Data::Buffer {
is_derived: !matches!(mode, pipeline::Mode::ToGit)
})
);
assert_eq!(
buf.as_bstr(),
"to-text\nd\n",
Expand All @@ -915,7 +930,7 @@ pub(crate) mod convert_to_diffable {
&mut buf,
)?;
assert_eq!(out.driver_index, Some(3));
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: true }));
assert_eq!(
buf.as_bstr(),
"to-text\nd-in-db",
Expand All @@ -937,7 +952,7 @@ pub(crate) mod convert_to_diffable {
&mut buf,
)?;
assert_eq!(out.driver_index, None);
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false }));
assert_eq!(
buf.as_bstr(),
"e\n",
Expand All @@ -958,7 +973,7 @@ pub(crate) mod convert_to_diffable {
&mut buf,
)?;
assert_eq!(out.driver_index, None);
assert_eq!(out.data, Some(pipeline::Data::Buffer));
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false }));
assert_eq!(
buf.as_bstr(),
"e-in-db",
Expand Down
Loading
Loading