Skip to content

Commit df7a926

Browse files
committed
add imara-diff::UnifiedDiffBuilder as basis.
It should be the basis for providing a more general way to obtain such diffs.
1 parent 8df0db2 commit df7a926

File tree

4 files changed

+179
-0
lines changed

4 files changed

+179
-0
lines changed

gix-diff/src/blob/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ pub mod pipeline;
1111
///
1212
pub mod platform;
1313

14+
///
15+
pub mod unified_diff;
16+
pub use unified_diff::_impl::UnifiedDiff;
17+
1418
/// Information about the diff performed to detect similarity.
1519
#[derive(Debug, Default, Clone, Copy, PartialEq, PartialOrd)]
1620
pub struct DiffLineStats {

gix-diff/src/blob/unified_diff.rs

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
//! Originally based on https://github.com/pascalkuthe/imara-diff/pull/14.
2+
//!
3+
4+
/// Defines the size of the context printed before and after each change.
5+
///
6+
/// Similar to the `-U` option in git diff or gnu-diff. If the context overlaps
7+
/// with previous or next change, the context gets reduced accordingly.
8+
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
9+
pub struct ContextSize {
10+
/// Defines the size of the context printed before and after each change.
11+
symmetrical: u32,
12+
}
13+
14+
impl Default for ContextSize {
15+
fn default() -> Self {
16+
ContextSize::symmetrical(3)
17+
}
18+
}
19+
20+
/// Instantiation
21+
impl ContextSize {
22+
/// Create a symmetrical context with `n` lines before and after a changed hunk.
23+
pub fn symmetrical(n: u32) -> Self {
24+
ContextSize { symmetrical: n }
25+
}
26+
}
27+
28+
pub(super) mod _impl {
29+
use imara_diff::{intern, Sink};
30+
use std::fmt::{Display, Write};
31+
use std::hash::Hash;
32+
use std::ops::Range;
33+
34+
use super::ContextSize;
35+
use intern::{InternedInput, Interner, Token};
36+
37+
/// A [`Sink`] that creates a textual diff
38+
/// in the format typically output by git or gnu-diff if the `-u` option is used
39+
pub struct UnifiedDiff<'a, W, T>
40+
where
41+
W: Write,
42+
T: Hash + Eq + Display,
43+
{
44+
before: &'a [Token],
45+
after: &'a [Token],
46+
interner: &'a Interner<T>,
47+
48+
pos: u32,
49+
before_hunk_start: u32,
50+
after_hunk_start: u32,
51+
before_hunk_len: u32,
52+
after_hunk_len: u32,
53+
/// Symmetrical context before and after the changed hunk.
54+
ctx_size: u32,
55+
56+
buffer: String,
57+
dst: W,
58+
}
59+
60+
impl<'a, T> UnifiedDiff<'a, String, T>
61+
where
62+
T: Hash + Eq + Display,
63+
{
64+
/// Create a new `UnifiedDiffBuilder` for the given `input`,
65+
/// displaying `context_size` lines of context around each change,
66+
/// that will return a [`String`].
67+
pub fn new(input: &'a InternedInput<T>, context_size: ContextSize) -> Self {
68+
Self {
69+
before_hunk_start: 0,
70+
after_hunk_start: 0,
71+
before_hunk_len: 0,
72+
after_hunk_len: 0,
73+
buffer: String::with_capacity(8),
74+
dst: String::new(),
75+
interner: &input.interner,
76+
before: &input.before,
77+
after: &input.after,
78+
pos: 0,
79+
ctx_size: context_size.symmetrical,
80+
}
81+
}
82+
}
83+
84+
impl<'a, W, T> UnifiedDiff<'a, W, T>
85+
where
86+
W: Write,
87+
T: Hash + Eq + Display,
88+
{
89+
/// Create a new `UnifiedDiffBuilder` for the given `input`,
90+
/// displaying `context_size` lines of context around each change,
91+
/// that will writes it output to the provided implementation of [`Write`].
92+
pub fn with_writer(input: &'a InternedInput<T>, writer: W, context_size: Option<u32>) -> Self {
93+
Self {
94+
before_hunk_start: 0,
95+
after_hunk_start: 0,
96+
before_hunk_len: 0,
97+
after_hunk_len: 0,
98+
buffer: String::with_capacity(8),
99+
dst: writer,
100+
interner: &input.interner,
101+
before: &input.before,
102+
after: &input.after,
103+
pos: 0,
104+
ctx_size: context_size.unwrap_or(3),
105+
}
106+
}
107+
108+
fn print_tokens(&mut self, tokens: &[Token], prefix: char) {
109+
for &token in tokens {
110+
writeln!(&mut self.buffer, "{prefix}{}", self.interner[token]).unwrap();
111+
}
112+
}
113+
114+
fn flush(&mut self) {
115+
if self.before_hunk_len == 0 && self.after_hunk_len == 0 {
116+
return;
117+
}
118+
119+
let end = (self.pos + self.ctx_size).min(self.before.len() as u32);
120+
self.update_pos(end, end);
121+
122+
writeln!(
123+
&mut self.dst,
124+
"@@ -{},{} +{},{} @@",
125+
self.before_hunk_start + 1,
126+
self.before_hunk_len,
127+
self.after_hunk_start + 1,
128+
self.after_hunk_len,
129+
)
130+
.unwrap();
131+
write!(&mut self.dst, "{}", &self.buffer).unwrap();
132+
self.buffer.clear();
133+
self.before_hunk_len = 0;
134+
self.after_hunk_len = 0
135+
}
136+
137+
fn update_pos(&mut self, print_to: u32, move_to: u32) {
138+
self.print_tokens(&self.before[self.pos as usize..print_to as usize], ' ');
139+
let len = print_to - self.pos;
140+
self.pos = move_to;
141+
self.before_hunk_len += len;
142+
self.after_hunk_len += len;
143+
}
144+
}
145+
146+
impl<W, T> Sink for UnifiedDiff<'_, W, T>
147+
where
148+
W: Write,
149+
T: Hash + Eq + Display,
150+
{
151+
type Out = W;
152+
153+
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
154+
if ((self.pos == 0) && (before.start - self.pos > self.ctx_size))
155+
|| (before.start - self.pos > 2 * self.ctx_size)
156+
{
157+
self.flush();
158+
self.pos = before.start - self.ctx_size;
159+
self.before_hunk_start = self.pos;
160+
self.after_hunk_start = after.start - self.ctx_size;
161+
}
162+
self.update_pos(before.start, before.end);
163+
self.before_hunk_len += before.end - before.start;
164+
self.after_hunk_len += after.end - after.start;
165+
self.print_tokens(&self.before[before.start as usize..before.end as usize], '-');
166+
self.print_tokens(&self.after[after.start as usize..after.end as usize], '+');
167+
}
168+
169+
fn finish(mut self) -> Self::Out {
170+
self.flush();
171+
self.dst
172+
}
173+
}
174+
}

gix-diff/tests/diff/blob/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
pub(crate) mod pipeline;
22
mod platform;
3+
mod unified_diff;

gix-diff/tests/diff/blob/unified_diff.rs

Whitespace-only changes.

0 commit comments

Comments
 (0)