1- //! Originally based on https://github.com/pascalkuthe/imara- diff/pull/14 .
1+ //! Facilities to produce the unified diff format .
22//!
3+ //! Originally based on <https://github.com/pascalkuthe/imara-diff/pull/14>.
34
45/// Defines the size of the context printed before and after each change.
56///
@@ -25,21 +26,50 @@ impl ContextSize {
2526 }
2627}
2728
29+ /// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff).
30+ pub trait ConsumeHunk {
31+ /// The item this instance produces after consuming all hunks.
32+ type Out ;
33+
34+ /// Consume a single `hunk` in unified diff format, that would be prefixed with `header`.
35+ /// Note that all newlines are added.
36+ ///
37+ /// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`].
38+ /// After this method returned its first error, it will not be called anymore.
39+ ///
40+ /// The following is hunk-related information and the same that is used in the `header`.
41+ /// * `before_hunk_start` is the 1-based first line of this hunk in the old file.
42+ /// * `before_hunk_len` the amount of lines of this hunk in the old file.
43+ /// * `after_hunk_start` is the 1-based first line of this hunk in the new file.
44+ /// * `after_hunk_len` the amount of lines of this hunk in the new file.
45+ fn consume_hunk (
46+ & mut self ,
47+ before_hunk_start : u32 ,
48+ before_hunk_len : u32 ,
49+ after_hunk_start : u32 ,
50+ after_hunk_len : u32 ,
51+ header : & str ,
52+ hunk : & [ u8 ] ,
53+ ) -> std:: io:: Result < ( ) > ;
54+ /// Called after the last hunk is consumed to produce an output.
55+ fn finish ( self ) -> Self :: Out ;
56+ }
57+
2858pub ( super ) mod _impl {
59+ use super :: { ConsumeHunk , ContextSize } ;
60+ use bstr:: { ByteSlice , ByteVec } ;
2961 use imara_diff:: { intern, Sink } ;
30- use std :: fmt :: { Display , Write } ;
62+ use intern :: { InternedInput , Interner , Token } ;
3163 use std:: hash:: Hash ;
64+ use std:: io:: ErrorKind ;
3265 use std:: ops:: Range ;
3366
34- use super :: ContextSize ;
35- use intern:: { InternedInput , Interner , Token } ;
36-
37- /// A [`Sink`] that creates a textual diff
38- /// in the format typically output by git or gnu-diff if the `-u` option is used
39- pub struct UnifiedDiff < ' a , W , T >
67+ /// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used,
68+ /// and passes it in full to a consumer.
69+ pub struct UnifiedDiff < ' a , T , D >
4070 where
41- W : Write ,
42- T : Hash + Eq + Display ,
71+ T : Hash + Eq + AsRef < [ u8 ] > ,
72+ D : ConsumeHunk ,
4373 {
4474 before : & ' a [ Token ] ,
4575 after : & ' a [ Token ] ,
@@ -53,85 +83,92 @@ pub(super) mod _impl {
5383 /// Symmetrical context before and after the changed hunk.
5484 ctx_size : u32 ,
5585
56- buffer : String ,
57- dst : W ,
86+ buffer : Vec < u8 > ,
87+ header_buf : String ,
88+ delegate : D ,
89+ newline : & ' a str ,
90+
91+ err : Option < std:: io:: Error > ,
5892 }
5993
60- impl < ' a , T > UnifiedDiff < ' a , String , T >
94+ impl < ' a , T , D > UnifiedDiff < ' a , T , D >
6195 where
62- T : Hash + Eq + Display ,
96+ T : Hash + Eq + AsRef < [ u8 ] > ,
97+ D : ConsumeHunk ,
6398 {
64- /// Create a new `UnifiedDiffBuilder` for the given `input`,
65- /// displaying `context_size` lines of context around each change,
66- /// that will return a [`String`].
67- pub fn new ( input : & ' a InternedInput < T > , context_size : ContextSize ) -> Self {
99+ /// Create a new instance to create unified diff using the lines in `input`,
100+ /// which also must be used when running the diff algorithm.
101+ /// `context_size` is the amount of lines around each hunk which will be passed
102+ ///to `consume_hunk`.
103+ ///
104+ /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`,
105+ pub fn new (
106+ input : & ' a InternedInput < T > ,
107+ consume_hunk : D ,
108+ newline_separator : & ' a str ,
109+ context_size : ContextSize ,
110+ ) -> Self {
68111 Self {
69112 before_hunk_start : 0 ,
70113 after_hunk_start : 0 ,
71114 before_hunk_len : 0 ,
72115 after_hunk_len : 0 ,
73- buffer : String :: with_capacity ( 8 ) ,
74- dst : String :: new ( ) ,
116+ buffer : Vec :: with_capacity ( 8 ) ,
117+ header_buf : String :: new ( ) ,
118+ delegate : consume_hunk,
75119 interner : & input. interner ,
76120 before : & input. before ,
77121 after : & input. after ,
78122 pos : 0 ,
79123 ctx_size : context_size. symmetrical ,
80- }
81- }
82- }
124+ newline : newline_separator,
83125
84- impl < ' a , W , T > UnifiedDiff < ' a , W , T >
85- where
86- W : Write ,
87- T : Hash + Eq + Display ,
88- {
89- /// Create a new `UnifiedDiffBuilder` for the given `input`,
90- /// displaying `context_size` lines of context around each change,
91- /// that will writes it output to the provided implementation of [`Write`].
92- pub fn with_writer ( input : & ' a InternedInput < T > , writer : W , context_size : Option < u32 > ) -> Self {
93- Self {
94- before_hunk_start : 0 ,
95- after_hunk_start : 0 ,
96- before_hunk_len : 0 ,
97- after_hunk_len : 0 ,
98- buffer : String :: with_capacity ( 8 ) ,
99- dst : writer,
100- interner : & input. interner ,
101- before : & input. before ,
102- after : & input. after ,
103- pos : 0 ,
104- ctx_size : context_size. unwrap_or ( 3 ) ,
126+ err : None ,
105127 }
106128 }
107129
108130 fn print_tokens ( & mut self , tokens : & [ Token ] , prefix : char ) {
109131 for & token in tokens {
110- writeln ! ( & mut self . buffer, "{prefix}{}" , self . interner[ token] ) . unwrap ( ) ;
132+ self . buffer . push_char ( prefix) ;
133+ self . buffer . push_str ( & self . interner [ token] ) ;
134+ self . buffer . push_str ( self . newline . as_bytes ( ) ) ;
111135 }
112136 }
113137
114- fn flush ( & mut self ) {
138+ fn flush ( & mut self ) -> std :: io :: Result < ( ) > {
115139 if self . before_hunk_len == 0 && self . after_hunk_len == 0 {
116- return ;
140+ return Ok ( ( ) ) ;
117141 }
118142
119143 let end = ( self . pos + self . ctx_size ) . min ( self . before . len ( ) as u32 ) ;
120144 self . update_pos ( end, end) ;
121145
122- writeln ! (
123- & mut self . dst,
124- "@@ -{},{} +{},{} @@" ,
146+ self . header_buf . clear ( ) ;
147+
148+ std:: fmt:: Write :: write_fmt (
149+ & mut self . header_buf ,
150+ format_args ! (
151+ "@@ -{},{} +{},{} @@{nl}" ,
152+ self . before_hunk_start + 1 ,
153+ self . before_hunk_len,
154+ self . after_hunk_start + 1 ,
155+ self . after_hunk_len,
156+ nl = self . newline
157+ ) ,
158+ )
159+ . map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other , err) ) ?;
160+ self . delegate . consume_hunk (
125161 self . before_hunk_start + 1 ,
126162 self . before_hunk_len ,
127163 self . after_hunk_start + 1 ,
128164 self . after_hunk_len ,
129- )
130- . unwrap ( ) ;
131- write ! ( & mut self . dst , "{}" , & self . buffer ) . unwrap ( ) ;
165+ & self . header_buf ,
166+ & self . buffer ,
167+ ) ? ;
132168 self . buffer . clear ( ) ;
133169 self . before_hunk_len = 0 ;
134- self . after_hunk_len = 0
170+ self . after_hunk_len = 0 ;
171+ Ok ( ( ) )
135172 }
136173
137174 fn update_pos ( & mut self , print_to : u32 , move_to : u32 ) {
@@ -143,18 +180,24 @@ pub(super) mod _impl {
143180 }
144181 }
145182
146- impl < W , T > Sink for UnifiedDiff < ' _ , W , T >
183+ impl < T , D > Sink for UnifiedDiff < ' _ , T , D >
147184 where
148- W : Write ,
149- T : Hash + Eq + Display ,
185+ T : Hash + Eq + AsRef < [ u8 ] > ,
186+ D : ConsumeHunk ,
150187 {
151- type Out = W ;
188+ type Out = std :: io :: Result < D :: Out > ;
152189
153190 fn process_change ( & mut self , before : Range < u32 > , after : Range < u32 > ) {
191+ if self . err . is_some ( ) {
192+ return ;
193+ }
154194 if ( ( self . pos == 0 ) && ( before. start - self . pos > self . ctx_size ) )
155195 || ( before. start - self . pos > 2 * self . ctx_size )
156196 {
157- self . flush ( ) ;
197+ if let Err ( err) = self . flush ( ) {
198+ self . err = Some ( err) ;
199+ return ;
200+ }
158201 self . pos = before. start - self . ctx_size ;
159202 self . before_hunk_start = self . pos ;
160203 self . after_hunk_start = after. start - self . ctx_size ;
@@ -167,8 +210,46 @@ pub(super) mod _impl {
167210 }
168211
169212 fn finish ( mut self ) -> Self :: Out {
170- self . flush ( ) ;
171- self . dst
213+ if let Err ( err) = self . flush ( ) {
214+ self . err = Some ( err) ;
215+ }
216+ if let Some ( err) = self . err {
217+ return Err ( err) ;
218+ }
219+ Ok ( self . delegate . finish ( ) )
220+ }
221+ }
222+
223+ /// An implementation that fails if the input isn't UTF-8.
224+ impl ConsumeHunk for String {
225+ type Out = Self ;
226+
227+ fn consume_hunk ( & mut self , _: u32 , _: u32 , _: u32 , _: u32 , header : & str , hunk : & [ u8 ] ) -> std:: io:: Result < ( ) > {
228+ self . push_str ( header) ;
229+ self . push_str (
230+ hunk. to_str ( )
231+ . map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other , err) ) ?,
232+ ) ;
233+ Ok ( ( ) )
234+ }
235+
236+ fn finish ( self ) -> Self :: Out {
237+ self
238+ }
239+ }
240+
241+ /// An implementation that writes hunks into a byte buffer.
242+ impl ConsumeHunk for Vec < u8 > {
243+ type Out = Self ;
244+
245+ fn consume_hunk ( & mut self , _: u32 , _: u32 , _: u32 , _: u32 , header : & str , hunk : & [ u8 ] ) -> std:: io:: Result < ( ) > {
246+ self . push_str ( header) ;
247+ self . push_str ( hunk) ;
248+ Ok ( ( ) )
249+ }
250+
251+ fn finish ( self ) -> Self :: Out {
252+ self
172253 }
173254 }
174255}
0 commit comments