1
+ //! Facilities to produce the unified diff format.
1
2
//! Originally based on https://github.com/pascalkuthe/imara-diff/pull/14.
2
3
//!
3
4
@@ -25,21 +26,50 @@ impl ContextSize {
25
26
}
26
27
}
27
28
29
+ /// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff).
30
+ pub trait ConsumeHunk {
31
+ /// The item this instance produces after consuming all hunks.
32
+ type Out ;
33
+
34
+ /// Consume a single `hunk` in unified diff format, that would be prefixed with `header`.
35
+ /// Note that all newlines are added.
36
+ ///
37
+ /// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`].
38
+ /// After this method returned its first error, it will not be called anymore.
39
+ ///
40
+ /// The following is hunk-related information and the same that is used in the `header`.
41
+ /// * `before_hunk_start` is the 1-based first line of this hunk in the old file.
42
+ /// * `before_hunk_len` the amount of lines of this hunk in the old file.
43
+ /// * `after_hunk_start` is the 1-based first line of this hunk in the new file.
44
+ /// * `after_hunk_len` the amount of lines of this hunk in the new file.
45
+ fn consume_hunk (
46
+ & mut self ,
47
+ before_hunk_start : u32 ,
48
+ before_hunk_len : u32 ,
49
+ after_hunk_start : u32 ,
50
+ after_hunk_len : u32 ,
51
+ header : & str ,
52
+ hunk : & [ u8 ] ,
53
+ ) -> std:: io:: Result < ( ) > ;
54
+ /// Called after the last hunk is consumed to produce an output.
55
+ fn finish ( self ) -> Self :: Out ;
56
+ }
57
+
28
58
pub ( super ) mod _impl {
59
+ use super :: { ConsumeHunk , ContextSize } ;
60
+ use bstr:: { ByteSlice , ByteVec } ;
29
61
use imara_diff:: { intern, Sink } ;
30
- use std :: fmt :: { Display , Write } ;
62
+ use intern :: { InternedInput , Interner , Token } ;
31
63
use std:: hash:: Hash ;
64
+ use std:: io:: ErrorKind ;
32
65
use std:: ops:: Range ;
33
66
34
- use super :: ContextSize ;
35
- use intern:: { InternedInput , Interner , Token } ;
36
-
37
- /// A [`Sink`] that creates a textual diff
38
- /// in the format typically output by git or gnu-diff if the `-u` option is used
39
- pub struct UnifiedDiff < ' a , W , T >
67
+ /// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used,
68
+ /// and passes it in full to a consumer.
69
+ pub struct UnifiedDiff < ' a , T , D >
40
70
where
41
- W : Write ,
42
- T : Hash + Eq + Display ,
71
+ T : Hash + Eq + AsRef < [ u8 ] > ,
72
+ D : ConsumeHunk ,
43
73
{
44
74
before : & ' a [ Token ] ,
45
75
after : & ' a [ Token ] ,
@@ -53,85 +83,91 @@ pub(super) mod _impl {
53
83
/// Symmetrical context before and after the changed hunk.
54
84
ctx_size : u32 ,
55
85
56
- buffer : String ,
57
- dst : W ,
86
+ buffer : Vec < u8 > ,
87
+ header_buf : String ,
88
+ delegate : D ,
89
+ newline : & ' a str ,
90
+
91
+ err : Option < std:: io:: Error > ,
58
92
}
59
93
60
- impl < ' a , T > UnifiedDiff < ' a , String , T >
94
+ impl < ' a , T , D > UnifiedDiff < ' a , T , D >
61
95
where
62
- T : Hash + Eq + Display ,
96
+ T : Hash + Eq + AsRef < [ u8 ] > ,
97
+ D : ConsumeHunk ,
63
98
{
64
99
/// Create a new `UnifiedDiffBuilder` for the given `input`,
65
100
/// displaying `context_size` lines of context around each change,
66
- /// that will return a [`String`].
67
- pub fn new ( input : & ' a InternedInput < T > , context_size : ContextSize ) -> Self {
101
+ /// that will write it output to the provided implementation of [`Write`].
102
+ ///
103
+ /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`,
104
+ pub fn new (
105
+ input : & ' a InternedInput < T > ,
106
+ consume_hunk : D ,
107
+ newline_separator : & ' a str ,
108
+ context_size : ContextSize ,
109
+ ) -> Self {
68
110
Self {
69
111
before_hunk_start : 0 ,
70
112
after_hunk_start : 0 ,
71
113
before_hunk_len : 0 ,
72
114
after_hunk_len : 0 ,
73
- buffer : String :: with_capacity ( 8 ) ,
74
- dst : String :: new ( ) ,
115
+ buffer : Vec :: with_capacity ( 8 ) ,
116
+ header_buf : String :: new ( ) ,
117
+ delegate : consume_hunk,
75
118
interner : & input. interner ,
76
119
before : & input. before ,
77
120
after : & input. after ,
78
121
pos : 0 ,
79
122
ctx_size : context_size. symmetrical ,
80
- }
81
- }
82
- }
123
+ newline : newline_separator,
83
124
84
- impl < ' a , W , T > UnifiedDiff < ' a , W , T >
85
- where
86
- W : Write ,
87
- T : Hash + Eq + Display ,
88
- {
89
- /// Create a new `UnifiedDiffBuilder` for the given `input`,
90
- /// displaying `context_size` lines of context around each change,
91
- /// that will writes it output to the provided implementation of [`Write`].
92
- pub fn with_writer ( input : & ' a InternedInput < T > , writer : W , context_size : Option < u32 > ) -> Self {
93
- Self {
94
- before_hunk_start : 0 ,
95
- after_hunk_start : 0 ,
96
- before_hunk_len : 0 ,
97
- after_hunk_len : 0 ,
98
- buffer : String :: with_capacity ( 8 ) ,
99
- dst : writer,
100
- interner : & input. interner ,
101
- before : & input. before ,
102
- after : & input. after ,
103
- pos : 0 ,
104
- ctx_size : context_size. unwrap_or ( 3 ) ,
125
+ err : None ,
105
126
}
106
127
}
107
128
108
129
fn print_tokens ( & mut self , tokens : & [ Token ] , prefix : char ) {
109
130
for & token in tokens {
110
- writeln ! ( & mut self . buffer, "{prefix}{}" , self . interner[ token] ) . unwrap ( ) ;
131
+ self . buffer . push_char ( prefix) ;
132
+ self . buffer . push_str ( & self . interner [ token] ) ;
133
+ self . buffer . push_str ( self . newline . as_bytes ( ) ) ;
111
134
}
112
135
}
113
136
114
- fn flush ( & mut self ) {
137
+ fn flush ( & mut self ) -> std :: io :: Result < ( ) > {
115
138
if self . before_hunk_len == 0 && self . after_hunk_len == 0 {
116
- return ;
139
+ return Ok ( ( ) ) ;
117
140
}
118
141
119
142
let end = ( self . pos + self . ctx_size ) . min ( self . before . len ( ) as u32 ) ;
120
143
self . update_pos ( end, end) ;
121
144
122
- writeln ! (
123
- & mut self . dst,
124
- "@@ -{},{} +{},{} @@" ,
145
+ self . header_buf . clear ( ) ;
146
+
147
+ std:: fmt:: Write :: write_fmt (
148
+ & mut self . header_buf ,
149
+ format_args ! (
150
+ "@@ -{},{} +{},{} @@{nl}" ,
151
+ self . before_hunk_start + 1 ,
152
+ self . before_hunk_len,
153
+ self . after_hunk_start + 1 ,
154
+ self . after_hunk_len,
155
+ nl = self . newline
156
+ ) ,
157
+ )
158
+ . map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other , err) ) ?;
159
+ self . delegate . consume_hunk (
125
160
self . before_hunk_start + 1 ,
126
161
self . before_hunk_len ,
127
162
self . after_hunk_start + 1 ,
128
163
self . after_hunk_len ,
129
- )
130
- . unwrap ( ) ;
131
- write ! ( & mut self . dst , "{}" , & self . buffer ) . unwrap ( ) ;
164
+ & self . header_buf ,
165
+ & self . buffer ,
166
+ ) ? ;
132
167
self . buffer . clear ( ) ;
133
168
self . before_hunk_len = 0 ;
134
- self . after_hunk_len = 0
169
+ self . after_hunk_len = 0 ;
170
+ Ok ( ( ) )
135
171
}
136
172
137
173
fn update_pos ( & mut self , print_to : u32 , move_to : u32 ) {
@@ -143,18 +179,24 @@ pub(super) mod _impl {
143
179
}
144
180
}
145
181
146
- impl < W , T > Sink for UnifiedDiff < ' _ , W , T >
182
+ impl < T , D > Sink for UnifiedDiff < ' _ , T , D >
147
183
where
148
- W : Write ,
149
- T : Hash + Eq + Display ,
184
+ T : Hash + Eq + AsRef < [ u8 ] > ,
185
+ D : ConsumeHunk ,
150
186
{
151
- type Out = W ;
187
+ type Out = std :: io :: Result < D :: Out > ;
152
188
153
189
fn process_change ( & mut self , before : Range < u32 > , after : Range < u32 > ) {
190
+ if self . err . is_some ( ) {
191
+ return ;
192
+ }
154
193
if ( ( self . pos == 0 ) && ( before. start - self . pos > self . ctx_size ) )
155
194
|| ( before. start - self . pos > 2 * self . ctx_size )
156
195
{
157
- self . flush ( ) ;
196
+ if let Err ( err) = self . flush ( ) {
197
+ self . err = Some ( err) ;
198
+ return ;
199
+ }
158
200
self . pos = before. start - self . ctx_size ;
159
201
self . before_hunk_start = self . pos ;
160
202
self . after_hunk_start = after. start - self . ctx_size ;
@@ -167,8 +209,46 @@ pub(super) mod _impl {
167
209
}
168
210
169
211
fn finish ( mut self ) -> Self :: Out {
170
- self . flush ( ) ;
171
- self . dst
212
+ if let Err ( err) = self . flush ( ) {
213
+ self . err = Some ( err) ;
214
+ }
215
+ if let Some ( err) = self . err {
216
+ return Err ( err) ;
217
+ }
218
+ Ok ( self . delegate . finish ( ) )
219
+ }
220
+ }
221
+
222
+ /// An implementation that fails if the input isn't UTF-8.
223
+ impl ConsumeHunk for String {
224
+ type Out = Self ;
225
+
226
+ fn consume_hunk ( & mut self , _: u32 , _: u32 , _: u32 , _: u32 , header : & str , hunk : & [ u8 ] ) -> std:: io:: Result < ( ) > {
227
+ self . push_str ( header) ;
228
+ self . push_str (
229
+ hunk. to_str ( )
230
+ . map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other , err) ) ?,
231
+ ) ;
232
+ Ok ( ( ) )
233
+ }
234
+
235
+ fn finish ( self ) -> Self :: Out {
236
+ self
237
+ }
238
+ }
239
+
240
+ /// An implementation that writes hunks into a byte buffer.
241
+ impl ConsumeHunk for Vec < u8 > {
242
+ type Out = Self ;
243
+
244
+ fn consume_hunk ( & mut self , _: u32 , _: u32 , _: u32 , _: u32 , header : & str , hunk : & [ u8 ] ) -> std:: io:: Result < ( ) > {
245
+ self . push_str ( header) ;
246
+ self . push_str ( hunk) ;
247
+ Ok ( ( ) )
248
+ }
249
+
250
+ fn finish ( self ) -> Self :: Out {
251
+ self
172
252
}
173
253
}
174
254
}
0 commit comments