@@ -26,6 +26,35 @@ impl ContextSize {
2626 }
2727}
2828
29+ /// Represents the type of a line in a unified diff.
30+ #[ derive( Debug , Clone , Copy , PartialEq , Eq ) ]
31+ pub enum DiffLineType {
32+ /// A line that exists in both old and new versions (context line).
33+ Context ,
34+ /// A line that was added in the new version.
35+ Add ,
36+ /// A line that was removed from the old version.
37+ Remove ,
38+ }
39+
40+ impl DiffLineType {
41+ fn to_prefix ( self ) -> char {
42+ match self {
43+ DiffLineType :: Context => ' ' ,
44+ DiffLineType :: Add => '+' ,
45+ DiffLineType :: Remove => '-' ,
46+ }
47+ }
48+
49+ fn to_byte_prefix ( self ) -> u8 {
50+ match self {
51+ DiffLineType :: Context => b' ' ,
52+ DiffLineType :: Add => b'+' ,
53+ DiffLineType :: Remove => b'-' ,
54+ }
55+ }
56+ }
57+
2958/// Specify where to put a newline.
3059#[ derive( Debug , Copy , Clone ) ]
3160pub enum NewlineSeparator < ' a > {
@@ -39,6 +68,31 @@ pub enum NewlineSeparator<'a> {
3968 AfterHeaderAndWhenNeeded ( & ' a str ) ,
4069}
4170
71+ /// A utility trait for use in [`UnifiedDiffSink`](super::UnifiedDiffSink).
72+ pub trait ConsumeTypedHunk {
73+ /// TODO:
74+ /// Document.
75+ type Out ;
76+
77+ /// TODO:
78+ /// Document.
79+ /// How do we want to pass the header to `consume_hunk`? We can add an additional parameter
80+ /// similar to `ConsumeHunk::consume_hunk` or add `DiffLineType::Header` in which case we
81+ /// didn’t have to add an additional parameter.
82+ fn consume_hunk (
83+ & mut self ,
84+ before_hunk_start : u32 ,
85+ before_hunk_len : u32 ,
86+ after_hunk_start : u32 ,
87+ after_hunk_len : u32 ,
88+ header : & str ,
89+ lines : & [ ( DiffLineType , & [ u8 ] ) ] ,
90+ ) -> std:: io:: Result < ( ) > ;
91+
92+ /// Called when processing is complete.
93+ fn finish ( self ) -> Self :: Out ;
94+ }
95+
4296/// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff).
4397pub trait ConsumeHunk {
4498 /// The item this instance produces after consuming all hunks.
@@ -75,18 +129,13 @@ pub(super) mod _impl {
75129 use imara_diff:: { intern, Sink } ;
76130 use intern:: { InternedInput , Interner , Token } ;
77131
78- use super :: { ConsumeHunk , ContextSize , NewlineSeparator } ;
79-
80- const CONTEXT : char = ' ' ;
81- const ADDITION : char = '+' ;
82- const REMOVAL : char = '-' ;
132+ use super :: { ConsumeHunk , ConsumeTypedHunk , ContextSize , DiffLineType , NewlineSeparator } ;
83133
84- /// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used,
85- /// and passes it in full to a consumer.
86- pub struct UnifiedDiff < ' a , T , D >
134+ /// A [`Sink`] that creates a unified diff and processes it hunk-by-hunk with structured type information.
135+ pub struct UnifiedDiffSink < ' a , T , D >
87136 where
88137 T : Hash + Eq + AsRef < [ u8 ] > ,
89- D : ConsumeHunk ,
138+ D : ConsumeTypedHunk ,
90139 {
91140 before : & ' a [ Token ] ,
92141 after : & ' a [ Token ] ,
@@ -106,26 +155,31 @@ pub(super) mod _impl {
106155
107156 /// Symmetrical context before and after the changed hunk.
108157 ctx_size : u32 ,
158+ // TODO:
159+ // Is there a way to remove `newline` from `UnifiedDiffSink` as it is purely
160+ // formatting-related?
161+ // One option would be to introduce `HunkHeader` with a method `format_header` that could
162+ // then be called outside `UnifiedDiffSink`, potentially taking `newline` as an argument.
109163 newline : NewlineSeparator < ' a > ,
110164
111- buffer : Vec < u8 > ,
165+ buffer : Vec < ( DiffLineType , Vec < u8 > ) > ,
112166 header_buf : String ,
113167 delegate : D ,
114168
115169 err : Option < std:: io:: Error > ,
116170 }
117171
118- impl < ' a , T , D > UnifiedDiff < ' a , T , D >
172+ impl < ' a , T , D > UnifiedDiffSink < ' a , T , D >
119173 where
120174 T : Hash + Eq + AsRef < [ u8 ] > ,
121- D : ConsumeHunk ,
175+ D : ConsumeTypedHunk ,
122176 {
123- /// Create a new instance to create unified diff using the lines in `input`,
177+ /// Create a new instance to create a unified diff using the lines in `input`,
124178 /// which also must be used when running the diff algorithm.
125179 /// `context_size` is the amount of lines around each hunk which will be passed
126- ///to `consume_hunk` .
180+ /// to the sink .
127181 ///
128- /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator` .
182+ /// The sink's `consume_hunk` method is called for each hunk with structured type information .
129183 pub fn new (
130184 input : & ' a InternedInput < T > ,
131185 consume_hunk : D ,
@@ -154,21 +208,10 @@ pub(super) mod _impl {
154208 }
155209 }
156210
157- fn print_tokens ( & mut self , tokens : & [ Token ] , prefix : char ) {
211+ fn print_tokens ( & mut self , tokens : & [ Token ] , line_type : DiffLineType ) {
158212 for & token in tokens {
159- self . buffer . push_char ( prefix) ;
160- let line = & self . interner [ token] ;
161- self . buffer . push_str ( line) ;
162- match self . newline {
163- NewlineSeparator :: AfterHeaderAndLine ( nl) => {
164- self . buffer . push_str ( nl) ;
165- }
166- NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => {
167- if !line. as_ref ( ) . ends_with_str ( nl) {
168- self . buffer . push_str ( nl) ;
169- }
170- }
171- }
213+ let content = self . interner [ token] . as_ref ( ) . to_vec ( ) ;
214+ self . buffer . push ( ( line_type, content) ) ;
172215 }
173216 }
174217
@@ -200,21 +243,36 @@ pub(super) mod _impl {
200243 ) ,
201244 )
202245 . map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other , err) ) ?;
246+
247+ // TODO:
248+ // Is this explicit conversion necessary?
249+ // Is the comment necessary?
250+ // Convert Vec<(DiffLineType, Vec<u8>)> to Vec<(DiffLineType, &[u8])>
251+ let lines: Vec < ( DiffLineType , & [ u8 ] ) > = self
252+ . buffer
253+ . iter ( )
254+ . map ( |( line_type, content) | ( * line_type, content. as_slice ( ) ) )
255+ . collect ( ) ;
256+
203257 self . delegate . consume_hunk (
204258 hunk_start,
205259 self . before_hunk_len ,
206260 hunk_end,
207261 self . after_hunk_len ,
208262 & self . header_buf ,
209- & self . buffer ,
263+ & lines ,
210264 ) ?;
211265
212266 self . reset_hunks ( ) ;
213267 Ok ( ( ) )
214268 }
215269
216270 fn print_context_and_update_pos ( & mut self , print : Range < u32 > , move_to : u32 ) {
217- self . print_tokens ( & self . before [ print. start as usize ..print. end as usize ] , CONTEXT ) ;
271+ self . print_tokens (
272+ & self . before [ print. start as usize ..print. end as usize ] ,
273+ DiffLineType :: Context ,
274+ ) ;
275+
218276 let len = print. end - print. start ;
219277 self . ctx_pos = Some ( move_to) ;
220278 self . before_hunk_len += len;
@@ -232,10 +290,10 @@ pub(super) mod _impl {
232290 }
233291 }
234292
235- impl < T , D > Sink for UnifiedDiff < ' _ , T , D >
293+ impl < T , D > Sink for UnifiedDiffSink < ' _ , T , D >
236294 where
237295 T : Hash + Eq + AsRef < [ u8 ] > ,
238- D : ConsumeHunk ,
296+ D : ConsumeTypedHunk ,
239297 {
240298 type Out = std:: io:: Result < D :: Out > ;
241299
@@ -270,8 +328,11 @@ pub(super) mod _impl {
270328 self . before_hunk_len += before. end - before. start ;
271329 self . after_hunk_len += after. end - after. start ;
272330
273- self . print_tokens ( & self . before [ before. start as usize ..before. end as usize ] , REMOVAL ) ;
274- self . print_tokens ( & self . after [ after. start as usize ..after. end as usize ] , ADDITION ) ;
331+ self . print_tokens (
332+ & self . before [ before. start as usize ..before. end as usize ] ,
333+ DiffLineType :: Remove ,
334+ ) ;
335+ self . print_tokens ( & self . after [ after. start as usize ..after. end as usize ] , DiffLineType :: Add ) ;
275336 }
276337
277338 fn finish ( mut self ) -> Self :: Out {
@@ -285,6 +346,95 @@ pub(super) mod _impl {
285346 }
286347 }
287348
349+ /// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used,
350+ /// and passes it in full to a consumer.
351+ pub struct UnifiedDiff < ' a , D >
352+ where
353+ D : ConsumeHunk ,
354+ {
355+ delegate : D ,
356+ newline : NewlineSeparator < ' a > ,
357+ buffer : Vec < u8 > ,
358+ }
359+
360+ impl < ' a , D > UnifiedDiff < ' a , D >
361+ where
362+ D : ConsumeHunk ,
363+ {
364+ /// Create a new instance to create a unified diff using the lines in `input`,
365+ /// which also must be used when running the diff algorithm.
366+ /// `context_size` is the amount of lines around each hunk which will be passed
367+ /// to `consume_hunk`.
368+ ///
369+ /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`.
370+ pub fn new < T > (
371+ input : & ' a InternedInput < T > ,
372+ consume_hunk : D ,
373+ newline_separator : NewlineSeparator < ' a > ,
374+ context_size : ContextSize ,
375+ ) -> UnifiedDiffSink < ' a , T , Self >
376+ where
377+ T : Hash + Eq + AsRef < [ u8 ] > ,
378+ {
379+ let formatter = Self {
380+ delegate : consume_hunk,
381+ newline : newline_separator,
382+ buffer : Vec :: new ( ) ,
383+ } ;
384+ // TODO:
385+ // Should this return a `UnifiedDiff` instead of a `UnifiedDiffSink`?
386+ UnifiedDiffSink :: new ( input, formatter, newline_separator, context_size)
387+ }
388+
389+ fn format_line ( & mut self , line_type : DiffLineType , content : & [ u8 ] ) {
390+ self . buffer . push ( line_type. to_byte_prefix ( ) ) ;
391+ self . buffer . push_str ( content) ;
392+ match self . newline {
393+ NewlineSeparator :: AfterHeaderAndLine ( nl) => {
394+ self . buffer . push_str ( nl) ;
395+ }
396+ NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => {
397+ if !content. ends_with_str ( nl) {
398+ self . buffer . push_str ( nl) ;
399+ }
400+ }
401+ }
402+ }
403+ }
404+
405+ impl < D : ConsumeHunk > ConsumeTypedHunk for UnifiedDiff < ' _ , D > {
406+ type Out = D :: Out ;
407+
408+ fn consume_hunk (
409+ & mut self ,
410+ before_hunk_start : u32 ,
411+ before_hunk_len : u32 ,
412+ after_hunk_start : u32 ,
413+ after_hunk_len : u32 ,
414+ header : & str ,
415+ lines : & [ ( DiffLineType , & [ u8 ] ) ] ,
416+ ) -> std:: io:: Result < ( ) > {
417+ self . buffer . clear ( ) ;
418+
419+ for & ( line_type, content) in lines {
420+ self . format_line ( line_type, content) ;
421+ }
422+
423+ self . delegate . consume_hunk (
424+ before_hunk_start,
425+ before_hunk_len,
426+ after_hunk_start,
427+ after_hunk_len,
428+ & header,
429+ & self . buffer ,
430+ )
431+ }
432+
433+ fn finish ( self ) -> Self :: Out {
434+ self . delegate . finish ( )
435+ }
436+ }
437+
288438 /// An implementation that fails if the input isn't UTF-8.
289439 impl ConsumeHunk for String {
290440 type Out = Self ;
@@ -317,4 +467,58 @@ pub(super) mod _impl {
317467 self
318468 }
319469 }
470+
471+ impl ConsumeTypedHunk for String {
472+ type Out = Self ;
473+
474+ fn consume_hunk (
475+ & mut self ,
476+ _: u32 ,
477+ _: u32 ,
478+ _: u32 ,
479+ _: u32 ,
480+ header : & str ,
481+ lines : & [ ( DiffLineType , & [ u8 ] ) ] ,
482+ ) -> std:: io:: Result < ( ) > {
483+ self . push_str ( header) ;
484+ for & ( line_type, content) in lines {
485+ self . push ( line_type. to_prefix ( ) ) ;
486+ // TODO:
487+ // How does `impl ConsumeHunk for String` handle errors?
488+ self . push_str ( std:: str:: from_utf8 ( content) . map_err ( |e| std:: io:: Error :: new ( ErrorKind :: Other , e) ) ?) ;
489+ self . push ( '\n' ) ;
490+ }
491+ Ok ( ( ) )
492+ }
493+
494+ fn finish ( self ) -> Self :: Out {
495+ self
496+ }
497+ }
498+
499+ impl ConsumeTypedHunk for Vec < u8 > {
500+ type Out = Self ;
501+
502+ fn consume_hunk (
503+ & mut self ,
504+ _: u32 ,
505+ _: u32 ,
506+ _: u32 ,
507+ _: u32 ,
508+ header : & str ,
509+ lines : & [ ( DiffLineType , & [ u8 ] ) ] ,
510+ ) -> std:: io:: Result < ( ) > {
511+ self . push_str ( header) ;
512+ for & ( line_type, content) in lines {
513+ self . push ( line_type. to_byte_prefix ( ) ) ;
514+ self . extend_from_slice ( content) ;
515+ self . push ( b'\n' ) ;
516+ }
517+ Ok ( ( ) )
518+ }
519+
520+ fn finish ( self ) -> Self :: Out {
521+ self
522+ }
523+ }
320524}
0 commit comments