Skip to content

Commit 1ccbeef

Browse files
committed
feat: add blob::UnifiedDiff as Sink to build unified diffs.
1 parent df7a926 commit 1ccbeef

File tree

3 files changed

+320
-62
lines changed

3 files changed

+320
-62
lines changed

gix-diff/src/blob/mod.rs

-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ pub mod pipeline;
1111
///
1212
pub mod platform;
1313

14-
///
1514
pub mod unified_diff;
1615
pub use unified_diff::_impl::UnifiedDiff;
1716

gix-diff/src/blob/unified_diff.rs

+142-61
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
//! Originally based on https://github.com/pascalkuthe/imara-diff/pull/14.
1+
//! Facilities to produce the unified diff format.
22
//!
3+
//! Originally based on <https://github.com/pascalkuthe/imara-diff/pull/14>.
34
45
/// Defines the size of the context printed before and after each change.
56
///
@@ -25,21 +26,50 @@ impl ContextSize {
2526
}
2627
}
2728

29+
/// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff).
30+
pub trait ConsumeHunk {
31+
/// The item this instance produces after consuming all hunks.
32+
type Out;
33+
34+
/// Consume a single `hunk` in unified diff format, that would be prefixed with `header`.
35+
/// Note that all newlines are added.
36+
///
37+
/// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`].
38+
/// After this method returned its first error, it will not be called anymore.
39+
///
40+
/// The following is hunk-related information and the same that is used in the `header`.
41+
/// * `before_hunk_start` is the 1-based first line of this hunk in the old file.
42+
/// * `before_hunk_len` the amount of lines of this hunk in the old file.
43+
/// * `after_hunk_start` is the 1-based first line of this hunk in the new file.
44+
/// * `after_hunk_len` the amount of lines of this hunk in the new file.
45+
fn consume_hunk(
46+
&mut self,
47+
before_hunk_start: u32,
48+
before_hunk_len: u32,
49+
after_hunk_start: u32,
50+
after_hunk_len: u32,
51+
header: &str,
52+
hunk: &[u8],
53+
) -> std::io::Result<()>;
54+
/// Called after the last hunk is consumed to produce an output.
55+
fn finish(self) -> Self::Out;
56+
}
57+
2858
pub(super) mod _impl {
59+
use super::{ConsumeHunk, ContextSize};
60+
use bstr::{ByteSlice, ByteVec};
2961
use imara_diff::{intern, Sink};
30-
use std::fmt::{Display, Write};
62+
use intern::{InternedInput, Interner, Token};
3163
use std::hash::Hash;
64+
use std::io::ErrorKind;
3265
use std::ops::Range;
3366

34-
use super::ContextSize;
35-
use intern::{InternedInput, Interner, Token};
36-
37-
/// A [`Sink`] that creates a textual diff
38-
/// in the format typically output by git or gnu-diff if the `-u` option is used
39-
pub struct UnifiedDiff<'a, W, T>
67+
/// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used,
68+
/// and passes it in full to a consumer.
69+
pub struct UnifiedDiff<'a, T, D>
4070
where
41-
W: Write,
42-
T: Hash + Eq + Display,
71+
T: Hash + Eq + AsRef<[u8]>,
72+
D: ConsumeHunk,
4373
{
4474
before: &'a [Token],
4575
after: &'a [Token],
@@ -53,85 +83,92 @@ pub(super) mod _impl {
5383
/// Symmetrical context before and after the changed hunk.
5484
ctx_size: u32,
5585

56-
buffer: String,
57-
dst: W,
86+
buffer: Vec<u8>,
87+
header_buf: String,
88+
delegate: D,
89+
newline: &'a str,
90+
91+
err: Option<std::io::Error>,
5892
}
5993

60-
impl<'a, T> UnifiedDiff<'a, String, T>
94+
impl<'a, T, D> UnifiedDiff<'a, T, D>
6195
where
62-
T: Hash + Eq + Display,
96+
T: Hash + Eq + AsRef<[u8]>,
97+
D: ConsumeHunk,
6398
{
64-
/// Create a new `UnifiedDiffBuilder` for the given `input`,
65-
/// displaying `context_size` lines of context around each change,
66-
/// that will return a [`String`].
67-
pub fn new(input: &'a InternedInput<T>, context_size: ContextSize) -> Self {
99+
/// Create a new instance to create unified diff using the lines in `input`,
100+
/// which also must be used when running the diff algorithm.
101+
/// `context_size` is the amount of lines around each hunk which will be passed
102+
///to `consume_hunk`.
103+
///
104+
/// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`,
105+
pub fn new(
106+
input: &'a InternedInput<T>,
107+
consume_hunk: D,
108+
newline_separator: &'a str,
109+
context_size: ContextSize,
110+
) -> Self {
68111
Self {
69112
before_hunk_start: 0,
70113
after_hunk_start: 0,
71114
before_hunk_len: 0,
72115
after_hunk_len: 0,
73-
buffer: String::with_capacity(8),
74-
dst: String::new(),
116+
buffer: Vec::with_capacity(8),
117+
header_buf: String::new(),
118+
delegate: consume_hunk,
75119
interner: &input.interner,
76120
before: &input.before,
77121
after: &input.after,
78122
pos: 0,
79123
ctx_size: context_size.symmetrical,
80-
}
81-
}
82-
}
124+
newline: newline_separator,
83125

84-
impl<'a, W, T> UnifiedDiff<'a, W, T>
85-
where
86-
W: Write,
87-
T: Hash + Eq + Display,
88-
{
89-
/// Create a new `UnifiedDiffBuilder` for the given `input`,
90-
/// displaying `context_size` lines of context around each change,
91-
/// that will writes it output to the provided implementation of [`Write`].
92-
pub fn with_writer(input: &'a InternedInput<T>, writer: W, context_size: Option<u32>) -> Self {
93-
Self {
94-
before_hunk_start: 0,
95-
after_hunk_start: 0,
96-
before_hunk_len: 0,
97-
after_hunk_len: 0,
98-
buffer: String::with_capacity(8),
99-
dst: writer,
100-
interner: &input.interner,
101-
before: &input.before,
102-
after: &input.after,
103-
pos: 0,
104-
ctx_size: context_size.unwrap_or(3),
126+
err: None,
105127
}
106128
}
107129

108130
fn print_tokens(&mut self, tokens: &[Token], prefix: char) {
109131
for &token in tokens {
110-
writeln!(&mut self.buffer, "{prefix}{}", self.interner[token]).unwrap();
132+
self.buffer.push_char(prefix);
133+
self.buffer.push_str(&self.interner[token]);
134+
self.buffer.push_str(self.newline.as_bytes());
111135
}
112136
}
113137

114-
fn flush(&mut self) {
138+
fn flush(&mut self) -> std::io::Result<()> {
115139
if self.before_hunk_len == 0 && self.after_hunk_len == 0 {
116-
return;
140+
return Ok(());
117141
}
118142

119143
let end = (self.pos + self.ctx_size).min(self.before.len() as u32);
120144
self.update_pos(end, end);
121145

122-
writeln!(
123-
&mut self.dst,
124-
"@@ -{},{} +{},{} @@",
146+
self.header_buf.clear();
147+
148+
std::fmt::Write::write_fmt(
149+
&mut self.header_buf,
150+
format_args!(
151+
"@@ -{},{} +{},{} @@{nl}",
152+
self.before_hunk_start + 1,
153+
self.before_hunk_len,
154+
self.after_hunk_start + 1,
155+
self.after_hunk_len,
156+
nl = self.newline
157+
),
158+
)
159+
.map_err(|err| std::io::Error::new(ErrorKind::Other, err))?;
160+
self.delegate.consume_hunk(
125161
self.before_hunk_start + 1,
126162
self.before_hunk_len,
127163
self.after_hunk_start + 1,
128164
self.after_hunk_len,
129-
)
130-
.unwrap();
131-
write!(&mut self.dst, "{}", &self.buffer).unwrap();
165+
&self.header_buf,
166+
&self.buffer,
167+
)?;
132168
self.buffer.clear();
133169
self.before_hunk_len = 0;
134-
self.after_hunk_len = 0
170+
self.after_hunk_len = 0;
171+
Ok(())
135172
}
136173

137174
fn update_pos(&mut self, print_to: u32, move_to: u32) {
@@ -143,18 +180,24 @@ pub(super) mod _impl {
143180
}
144181
}
145182

146-
impl<W, T> Sink for UnifiedDiff<'_, W, T>
183+
impl<T, D> Sink for UnifiedDiff<'_, T, D>
147184
where
148-
W: Write,
149-
T: Hash + Eq + Display,
185+
T: Hash + Eq + AsRef<[u8]>,
186+
D: ConsumeHunk,
150187
{
151-
type Out = W;
188+
type Out = std::io::Result<D::Out>;
152189

153190
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
191+
if self.err.is_some() {
192+
return;
193+
}
154194
if ((self.pos == 0) && (before.start - self.pos > self.ctx_size))
155195
|| (before.start - self.pos > 2 * self.ctx_size)
156196
{
157-
self.flush();
197+
if let Err(err) = self.flush() {
198+
self.err = Some(err);
199+
return;
200+
}
158201
self.pos = before.start - self.ctx_size;
159202
self.before_hunk_start = self.pos;
160203
self.after_hunk_start = after.start - self.ctx_size;
@@ -167,8 +210,46 @@ pub(super) mod _impl {
167210
}
168211

169212
fn finish(mut self) -> Self::Out {
170-
self.flush();
171-
self.dst
213+
if let Err(err) = self.flush() {
214+
self.err = Some(err);
215+
}
216+
if let Some(err) = self.err {
217+
return Err(err);
218+
}
219+
Ok(self.delegate.finish())
220+
}
221+
}
222+
223+
/// An implementation that fails if the input isn't UTF-8.
224+
impl ConsumeHunk for String {
225+
type Out = Self;
226+
227+
fn consume_hunk(&mut self, _: u32, _: u32, _: u32, _: u32, header: &str, hunk: &[u8]) -> std::io::Result<()> {
228+
self.push_str(header);
229+
self.push_str(
230+
hunk.to_str()
231+
.map_err(|err| std::io::Error::new(ErrorKind::Other, err))?,
232+
);
233+
Ok(())
234+
}
235+
236+
fn finish(self) -> Self::Out {
237+
self
238+
}
239+
}
240+
241+
/// An implementation that writes hunks into a byte buffer.
242+
impl ConsumeHunk for Vec<u8> {
243+
type Out = Self;
244+
245+
fn consume_hunk(&mut self, _: u32, _: u32, _: u32, _: u32, header: &str, hunk: &[u8]) -> std::io::Result<()> {
246+
self.push_str(header);
247+
self.push_str(hunk);
248+
Ok(())
249+
}
250+
251+
fn finish(self) -> Self::Out {
252+
self
172253
}
173254
}
174255
}

0 commit comments

Comments
 (0)