Skip to content

Commit 848e92a

Browse files
committed
feat: add blob::UnifiedDiff as Sink to build unified diffs.
1 parent df7a926 commit 848e92a

File tree

2 files changed

+315
-58
lines changed

2 files changed

+315
-58
lines changed

gix-diff/src/blob/unified_diff.rs

Lines changed: 138 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
//! Facilities to produce the unified diff format.
12
//! Originally based on https://github.com/pascalkuthe/imara-diff/pull/14.
23
//!
34
@@ -25,21 +26,50 @@ impl ContextSize {
2526
}
2627
}
2728

29+
/// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff).
30+
pub trait ConsumeHunk {
31+
/// The item this instance produces after consuming all hunks.
32+
type Out;
33+
34+
/// Consume a single `hunk` in unified diff format, that would be prefixed with `header`.
35+
/// Note that all newlines are added.
36+
///
37+
/// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`].
38+
/// After this method returned its first error, it will not be called anymore.
39+
///
40+
/// The following is hunk-related information and the same that is used in the `header`.
41+
/// * `before_hunk_start` is the 1-based first line of this hunk in the old file.
42+
/// * `before_hunk_len` the amount of lines of this hunk in the old file.
43+
/// * `after_hunk_start` is the 1-based first line of this hunk in the new file.
44+
/// * `after_hunk_len` the amount of lines of this hunk in the new file.
45+
fn consume_hunk(
46+
&mut self,
47+
before_hunk_start: u32,
48+
before_hunk_len: u32,
49+
after_hunk_start: u32,
50+
after_hunk_len: u32,
51+
header: &str,
52+
hunk: &[u8],
53+
) -> std::io::Result<()>;
54+
/// Called after the last hunk is consumed to produce an output.
55+
fn finish(self) -> Self::Out;
56+
}
57+
2858
pub(super) mod _impl {
59+
use super::{ConsumeHunk, ContextSize};
60+
use bstr::{ByteSlice, ByteVec};
2961
use imara_diff::{intern, Sink};
30-
use std::fmt::{Display, Write};
62+
use intern::{InternedInput, Interner, Token};
3163
use std::hash::Hash;
64+
use std::io::ErrorKind;
3265
use std::ops::Range;
3366

34-
use super::ContextSize;
35-
use intern::{InternedInput, Interner, Token};
36-
37-
/// A [`Sink`] that creates a textual diff
38-
/// in the format typically output by git or gnu-diff if the `-u` option is used
39-
pub struct UnifiedDiff<'a, W, T>
67+
/// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used,
68+
/// and passes it in full to a consumer.
69+
pub struct UnifiedDiff<'a, T, D>
4070
where
41-
W: Write,
42-
T: Hash + Eq + Display,
71+
T: Hash + Eq + AsRef<[u8]>,
72+
D: ConsumeHunk,
4373
{
4474
before: &'a [Token],
4575
after: &'a [Token],
@@ -53,85 +83,91 @@ pub(super) mod _impl {
5383
/// Symmetrical context before and after the changed hunk.
5484
ctx_size: u32,
5585

56-
buffer: String,
57-
dst: W,
86+
buffer: Vec<u8>,
87+
header_buf: String,
88+
delegate: D,
89+
newline: &'a str,
90+
91+
err: Option<std::io::Error>,
5892
}
5993

60-
impl<'a, T> UnifiedDiff<'a, String, T>
94+
impl<'a, T, D> UnifiedDiff<'a, T, D>
6195
where
62-
T: Hash + Eq + Display,
96+
T: Hash + Eq + AsRef<[u8]>,
97+
D: ConsumeHunk,
6398
{
6499
/// Create a new `UnifiedDiffBuilder` for the given `input`,
65100
/// displaying `context_size` lines of context around each change,
66-
/// that will return a [`String`].
67-
pub fn new(input: &'a InternedInput<T>, context_size: ContextSize) -> Self {
101+
/// that will write it output to the provided implementation of [`Write`].
102+
///
103+
/// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`,
104+
pub fn new(
105+
input: &'a InternedInput<T>,
106+
consume_hunk: D,
107+
newline_separator: &'a str,
108+
context_size: ContextSize,
109+
) -> Self {
68110
Self {
69111
before_hunk_start: 0,
70112
after_hunk_start: 0,
71113
before_hunk_len: 0,
72114
after_hunk_len: 0,
73-
buffer: String::with_capacity(8),
74-
dst: String::new(),
115+
buffer: Vec::with_capacity(8),
116+
header_buf: String::new(),
117+
delegate: consume_hunk,
75118
interner: &input.interner,
76119
before: &input.before,
77120
after: &input.after,
78121
pos: 0,
79122
ctx_size: context_size.symmetrical,
80-
}
81-
}
82-
}
123+
newline: newline_separator,
83124

84-
impl<'a, W, T> UnifiedDiff<'a, W, T>
85-
where
86-
W: Write,
87-
T: Hash + Eq + Display,
88-
{
89-
/// Create a new `UnifiedDiffBuilder` for the given `input`,
90-
/// displaying `context_size` lines of context around each change,
91-
/// that will writes it output to the provided implementation of [`Write`].
92-
pub fn with_writer(input: &'a InternedInput<T>, writer: W, context_size: Option<u32>) -> Self {
93-
Self {
94-
before_hunk_start: 0,
95-
after_hunk_start: 0,
96-
before_hunk_len: 0,
97-
after_hunk_len: 0,
98-
buffer: String::with_capacity(8),
99-
dst: writer,
100-
interner: &input.interner,
101-
before: &input.before,
102-
after: &input.after,
103-
pos: 0,
104-
ctx_size: context_size.unwrap_or(3),
125+
err: None,
105126
}
106127
}
107128

108129
fn print_tokens(&mut self, tokens: &[Token], prefix: char) {
109130
for &token in tokens {
110-
writeln!(&mut self.buffer, "{prefix}{}", self.interner[token]).unwrap();
131+
self.buffer.push_char(prefix);
132+
self.buffer.push_str(&self.interner[token]);
133+
self.buffer.push_str(self.newline.as_bytes());
111134
}
112135
}
113136

114-
fn flush(&mut self) {
137+
fn flush(&mut self) -> std::io::Result<()> {
115138
if self.before_hunk_len == 0 && self.after_hunk_len == 0 {
116-
return;
139+
return Ok(());
117140
}
118141

119142
let end = (self.pos + self.ctx_size).min(self.before.len() as u32);
120143
self.update_pos(end, end);
121144

122-
writeln!(
123-
&mut self.dst,
124-
"@@ -{},{} +{},{} @@",
145+
self.header_buf.clear();
146+
147+
std::fmt::Write::write_fmt(
148+
&mut self.header_buf,
149+
format_args!(
150+
"@@ -{},{} +{},{} @@{nl}",
151+
self.before_hunk_start + 1,
152+
self.before_hunk_len,
153+
self.after_hunk_start + 1,
154+
self.after_hunk_len,
155+
nl = self.newline
156+
),
157+
)
158+
.map_err(|err| std::io::Error::new(ErrorKind::Other, err))?;
159+
self.delegate.consume_hunk(
125160
self.before_hunk_start + 1,
126161
self.before_hunk_len,
127162
self.after_hunk_start + 1,
128163
self.after_hunk_len,
129-
)
130-
.unwrap();
131-
write!(&mut self.dst, "{}", &self.buffer).unwrap();
164+
&self.header_buf,
165+
&self.buffer,
166+
)?;
132167
self.buffer.clear();
133168
self.before_hunk_len = 0;
134-
self.after_hunk_len = 0
169+
self.after_hunk_len = 0;
170+
Ok(())
135171
}
136172

137173
fn update_pos(&mut self, print_to: u32, move_to: u32) {
@@ -143,18 +179,24 @@ pub(super) mod _impl {
143179
}
144180
}
145181

146-
impl<W, T> Sink for UnifiedDiff<'_, W, T>
182+
impl<T, D> Sink for UnifiedDiff<'_, T, D>
147183
where
148-
W: Write,
149-
T: Hash + Eq + Display,
184+
T: Hash + Eq + AsRef<[u8]>,
185+
D: ConsumeHunk,
150186
{
151-
type Out = W;
187+
type Out = std::io::Result<D::Out>;
152188

153189
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
190+
if self.err.is_some() {
191+
return;
192+
}
154193
if ((self.pos == 0) && (before.start - self.pos > self.ctx_size))
155194
|| (before.start - self.pos > 2 * self.ctx_size)
156195
{
157-
self.flush();
196+
if let Err(err) = self.flush() {
197+
self.err = Some(err);
198+
return;
199+
}
158200
self.pos = before.start - self.ctx_size;
159201
self.before_hunk_start = self.pos;
160202
self.after_hunk_start = after.start - self.ctx_size;
@@ -167,8 +209,46 @@ pub(super) mod _impl {
167209
}
168210

169211
fn finish(mut self) -> Self::Out {
170-
self.flush();
171-
self.dst
212+
if let Err(err) = self.flush() {
213+
self.err = Some(err);
214+
}
215+
if let Some(err) = self.err {
216+
return Err(err);
217+
}
218+
Ok(self.delegate.finish())
219+
}
220+
}
221+
222+
/// An implementation that fails if the input isn't UTF-8.
223+
impl ConsumeHunk for String {
224+
type Out = Self;
225+
226+
fn consume_hunk(&mut self, _: u32, _: u32, _: u32, _: u32, header: &str, hunk: &[u8]) -> std::io::Result<()> {
227+
self.push_str(header);
228+
self.push_str(
229+
hunk.to_str()
230+
.map_err(|err| std::io::Error::new(ErrorKind::Other, err))?,
231+
);
232+
Ok(())
233+
}
234+
235+
fn finish(self) -> Self::Out {
236+
self
237+
}
238+
}
239+
240+
/// An implementation that writes hunks into a byte buffer.
241+
impl ConsumeHunk for Vec<u8> {
242+
type Out = Self;
243+
244+
fn consume_hunk(&mut self, _: u32, _: u32, _: u32, _: u32, header: &str, hunk: &[u8]) -> std::io::Result<()> {
245+
self.push_str(header);
246+
self.push_str(hunk);
247+
Ok(())
248+
}
249+
250+
fn finish(self) -> Self::Out {
251+
self
172252
}
173253
}
174254
}

0 commit comments

Comments
 (0)