1
- //! Originally based on https://github.com/pascalkuthe/imara- diff/pull/14 .
1
+ //! Facilities to produce the unified diff format .
2
2
//!
3
+ //! Originally based on <https://github.com/pascalkuthe/imara-diff/pull/14>.
3
4
4
5
/// Defines the size of the context printed before and after each change.
5
6
///
@@ -25,21 +26,50 @@ impl ContextSize {
25
26
}
26
27
}
27
28
29
+ /// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff).
30
+ pub trait ConsumeHunk {
31
+ /// The item this instance produces after consuming all hunks.
32
+ type Out ;
33
+
34
+ /// Consume a single `hunk` in unified diff format, that would be prefixed with `header`.
35
+ /// Note that all newlines are added.
36
+ ///
37
+ /// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`].
38
+ /// After this method returned its first error, it will not be called anymore.
39
+ ///
40
+ /// The following is hunk-related information and the same that is used in the `header`.
41
+ /// * `before_hunk_start` is the 1-based first line of this hunk in the old file.
42
+ /// * `before_hunk_len` the amount of lines of this hunk in the old file.
43
+ /// * `after_hunk_start` is the 1-based first line of this hunk in the new file.
44
+ /// * `after_hunk_len` the amount of lines of this hunk in the new file.
45
+ fn consume_hunk (
46
+ & mut self ,
47
+ before_hunk_start : u32 ,
48
+ before_hunk_len : u32 ,
49
+ after_hunk_start : u32 ,
50
+ after_hunk_len : u32 ,
51
+ header : & str ,
52
+ hunk : & [ u8 ] ,
53
+ ) -> std:: io:: Result < ( ) > ;
54
+ /// Called after the last hunk is consumed to produce an output.
55
+ fn finish ( self ) -> Self :: Out ;
56
+ }
57
+
28
58
pub ( super ) mod _impl {
59
+ use super :: { ConsumeHunk , ContextSize } ;
60
+ use bstr:: { ByteSlice , ByteVec } ;
29
61
use imara_diff:: { intern, Sink } ;
30
- use std :: fmt :: { Display , Write } ;
62
+ use intern :: { InternedInput , Interner , Token } ;
31
63
use std:: hash:: Hash ;
64
+ use std:: io:: ErrorKind ;
32
65
use std:: ops:: Range ;
33
66
34
- use super :: ContextSize ;
35
- use intern:: { InternedInput , Interner , Token } ;
36
-
37
- /// A [`Sink`] that creates a textual diff
38
- /// in the format typically output by git or gnu-diff if the `-u` option is used
39
- pub struct UnifiedDiff < ' a , W , T >
67
+ /// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used,
68
+ /// and passes it in full to a consumer.
69
+ pub struct UnifiedDiff < ' a , T , D >
40
70
where
41
- W : Write ,
42
- T : Hash + Eq + Display ,
71
+ T : Hash + Eq + AsRef < [ u8 ] > ,
72
+ D : ConsumeHunk ,
43
73
{
44
74
before : & ' a [ Token ] ,
45
75
after : & ' a [ Token ] ,
@@ -53,85 +83,92 @@ pub(super) mod _impl {
53
83
/// Symmetrical context before and after the changed hunk.
54
84
ctx_size : u32 ,
55
85
56
- buffer : String ,
57
- dst : W ,
86
+ buffer : Vec < u8 > ,
87
+ header_buf : String ,
88
+ delegate : D ,
89
+ newline : & ' a str ,
90
+
91
+ err : Option < std:: io:: Error > ,
58
92
}
59
93
60
- impl < ' a , T > UnifiedDiff < ' a , String , T >
94
+ impl < ' a , T , D > UnifiedDiff < ' a , T , D >
61
95
where
62
- T : Hash + Eq + Display ,
96
+ T : Hash + Eq + AsRef < [ u8 ] > ,
97
+ D : ConsumeHunk ,
63
98
{
64
- /// Create a new `UnifiedDiffBuilder` for the given `input`,
65
- /// displaying `context_size` lines of context around each change,
66
- /// that will return a [`String`].
67
- pub fn new ( input : & ' a InternedInput < T > , context_size : ContextSize ) -> Self {
99
+ /// Create a new instance to create unified diff using the lines in `input`,
100
+ /// which also must be used when running the diff algorithm.
101
+ /// `context_size` is the amount of lines around each hunk which will be passed
102
+ ///to `consume_hunk`.
103
+ ///
104
+ /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`,
105
+ pub fn new (
106
+ input : & ' a InternedInput < T > ,
107
+ consume_hunk : D ,
108
+ newline_separator : & ' a str ,
109
+ context_size : ContextSize ,
110
+ ) -> Self {
68
111
Self {
69
112
before_hunk_start : 0 ,
70
113
after_hunk_start : 0 ,
71
114
before_hunk_len : 0 ,
72
115
after_hunk_len : 0 ,
73
- buffer : String :: with_capacity ( 8 ) ,
74
- dst : String :: new ( ) ,
116
+ buffer : Vec :: with_capacity ( 8 ) ,
117
+ header_buf : String :: new ( ) ,
118
+ delegate : consume_hunk,
75
119
interner : & input. interner ,
76
120
before : & input. before ,
77
121
after : & input. after ,
78
122
pos : 0 ,
79
123
ctx_size : context_size. symmetrical ,
80
- }
81
- }
82
- }
124
+ newline : newline_separator,
83
125
84
- impl < ' a , W , T > UnifiedDiff < ' a , W , T >
85
- where
86
- W : Write ,
87
- T : Hash + Eq + Display ,
88
- {
89
- /// Create a new `UnifiedDiffBuilder` for the given `input`,
90
- /// displaying `context_size` lines of context around each change,
91
- /// that will writes it output to the provided implementation of [`Write`].
92
- pub fn with_writer ( input : & ' a InternedInput < T > , writer : W , context_size : Option < u32 > ) -> Self {
93
- Self {
94
- before_hunk_start : 0 ,
95
- after_hunk_start : 0 ,
96
- before_hunk_len : 0 ,
97
- after_hunk_len : 0 ,
98
- buffer : String :: with_capacity ( 8 ) ,
99
- dst : writer,
100
- interner : & input. interner ,
101
- before : & input. before ,
102
- after : & input. after ,
103
- pos : 0 ,
104
- ctx_size : context_size. unwrap_or ( 3 ) ,
126
+ err : None ,
105
127
}
106
128
}
107
129
108
130
fn print_tokens ( & mut self , tokens : & [ Token ] , prefix : char ) {
109
131
for & token in tokens {
110
- writeln ! ( & mut self . buffer, "{prefix}{}" , self . interner[ token] ) . unwrap ( ) ;
132
+ self . buffer . push_char ( prefix) ;
133
+ self . buffer . push_str ( & self . interner [ token] ) ;
134
+ self . buffer . push_str ( self . newline . as_bytes ( ) ) ;
111
135
}
112
136
}
113
137
114
- fn flush ( & mut self ) {
138
+ fn flush ( & mut self ) -> std :: io :: Result < ( ) > {
115
139
if self . before_hunk_len == 0 && self . after_hunk_len == 0 {
116
- return ;
140
+ return Ok ( ( ) ) ;
117
141
}
118
142
119
143
let end = ( self . pos + self . ctx_size ) . min ( self . before . len ( ) as u32 ) ;
120
144
self . update_pos ( end, end) ;
121
145
122
- writeln ! (
123
- & mut self . dst,
124
- "@@ -{},{} +{},{} @@" ,
146
+ self . header_buf . clear ( ) ;
147
+
148
+ std:: fmt:: Write :: write_fmt (
149
+ & mut self . header_buf ,
150
+ format_args ! (
151
+ "@@ -{},{} +{},{} @@{nl}" ,
152
+ self . before_hunk_start + 1 ,
153
+ self . before_hunk_len,
154
+ self . after_hunk_start + 1 ,
155
+ self . after_hunk_len,
156
+ nl = self . newline
157
+ ) ,
158
+ )
159
+ . map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other , err) ) ?;
160
+ self . delegate . consume_hunk (
125
161
self . before_hunk_start + 1 ,
126
162
self . before_hunk_len ,
127
163
self . after_hunk_start + 1 ,
128
164
self . after_hunk_len ,
129
- )
130
- . unwrap ( ) ;
131
- write ! ( & mut self . dst , "{}" , & self . buffer ) . unwrap ( ) ;
165
+ & self . header_buf ,
166
+ & self . buffer ,
167
+ ) ? ;
132
168
self . buffer . clear ( ) ;
133
169
self . before_hunk_len = 0 ;
134
- self . after_hunk_len = 0
170
+ self . after_hunk_len = 0 ;
171
+ Ok ( ( ) )
135
172
}
136
173
137
174
fn update_pos ( & mut self , print_to : u32 , move_to : u32 ) {
@@ -143,18 +180,24 @@ pub(super) mod _impl {
143
180
}
144
181
}
145
182
146
- impl < W , T > Sink for UnifiedDiff < ' _ , W , T >
183
+ impl < T , D > Sink for UnifiedDiff < ' _ , T , D >
147
184
where
148
- W : Write ,
149
- T : Hash + Eq + Display ,
185
+ T : Hash + Eq + AsRef < [ u8 ] > ,
186
+ D : ConsumeHunk ,
150
187
{
151
- type Out = W ;
188
+ type Out = std :: io :: Result < D :: Out > ;
152
189
153
190
fn process_change ( & mut self , before : Range < u32 > , after : Range < u32 > ) {
191
+ if self . err . is_some ( ) {
192
+ return ;
193
+ }
154
194
if ( ( self . pos == 0 ) && ( before. start - self . pos > self . ctx_size ) )
155
195
|| ( before. start - self . pos > 2 * self . ctx_size )
156
196
{
157
- self . flush ( ) ;
197
+ if let Err ( err) = self . flush ( ) {
198
+ self . err = Some ( err) ;
199
+ return ;
200
+ }
158
201
self . pos = before. start - self . ctx_size ;
159
202
self . before_hunk_start = self . pos ;
160
203
self . after_hunk_start = after. start - self . ctx_size ;
@@ -167,8 +210,46 @@ pub(super) mod _impl {
167
210
}
168
211
169
212
fn finish ( mut self ) -> Self :: Out {
170
- self . flush ( ) ;
171
- self . dst
213
+ if let Err ( err) = self . flush ( ) {
214
+ self . err = Some ( err) ;
215
+ }
216
+ if let Some ( err) = self . err {
217
+ return Err ( err) ;
218
+ }
219
+ Ok ( self . delegate . finish ( ) )
220
+ }
221
+ }
222
+
223
+ /// An implementation that fails if the input isn't UTF-8.
224
+ impl ConsumeHunk for String {
225
+ type Out = Self ;
226
+
227
+ fn consume_hunk ( & mut self , _: u32 , _: u32 , _: u32 , _: u32 , header : & str , hunk : & [ u8 ] ) -> std:: io:: Result < ( ) > {
228
+ self . push_str ( header) ;
229
+ self . push_str (
230
+ hunk. to_str ( )
231
+ . map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other , err) ) ?,
232
+ ) ;
233
+ Ok ( ( ) )
234
+ }
235
+
236
+ fn finish ( self ) -> Self :: Out {
237
+ self
238
+ }
239
+ }
240
+
241
+ /// An implementation that writes hunks into a byte buffer.
242
+ impl ConsumeHunk for Vec < u8 > {
243
+ type Out = Self ;
244
+
245
+ fn consume_hunk ( & mut self , _: u32 , _: u32 , _: u32 , _: u32 , header : & str , hunk : & [ u8 ] ) -> std:: io:: Result < ( ) > {
246
+ self . push_str ( header) ;
247
+ self . push_str ( hunk) ;
248
+ Ok ( ( ) )
249
+ }
250
+
251
+ fn finish ( self ) -> Self :: Out {
252
+ self
172
253
}
173
254
}
174
255
}
0 commit comments