1+ //! Git diff parsing utilities.
2+
3+ use anyhow:: Result ;
4+
5+ /// Represents a parsed file from a git diff
6+ #[ derive( Debug , Clone ) ]
7+ pub struct ParsedFile {
8+ pub path : String ,
9+ pub operation : String ,
10+ pub diff_content : String ,
11+ }
12+
13+ /// Extracts file path from diff header parts
14+ ///
15+ /// Handles various git prefixes (a/, b/, c/, i/) and /dev/null for deleted files.
16+ ///
17+ /// # Arguments
18+ /// * `parts` - The whitespace-split parts from a "diff --git" line
19+ ///
20+ /// # Returns
21+ /// * `Option<String>` - The extracted path without prefixes, or None if parsing fails
22+ fn extract_file_path_from_diff_parts ( parts : & [ & str ] ) -> Option < String > {
23+ if parts. len ( ) < 4 {
24+ return None ;
25+ }
26+
27+ // Helper to strip git prefixes (a/, b/, c/, i/)
28+ let strip_prefix = |s : & str | {
29+ s. trim_start_matches ( "a/" )
30+ . trim_start_matches ( "b/" )
31+ . trim_start_matches ( "c/" )
32+ . trim_start_matches ( "i/" )
33+ . to_string ( )
34+ } ;
35+
36+ let new_path = strip_prefix ( parts[ 3 ] ) ;
37+ let old_path = strip_prefix ( parts[ 2 ] ) ;
38+
39+ // Prefer new path unless it's /dev/null (deleted file)
40+ Some ( if new_path == "/dev/null" || new_path == "dev/null" {
41+ old_path
42+ } else {
43+ new_path
44+ } )
45+ }
46+
47+ /// Parse git diff into individual file changes.
48+ ///
49+ /// Handles various diff formats including:
50+ /// - Standard git diff output
51+ /// - Diffs with commit hashes
52+ /// - Diffs with various path prefixes (a/, b/, c/, i/)
53+ /// - Deleted files (/dev/null paths)
54+ ///
55+ /// # Arguments
56+ /// * `diff_content` - Raw git diff text
57+ ///
58+ /// # Returns
59+ /// * `Result<Vec<ParsedFile>>` - Parsed files or error
60+ pub fn parse_diff ( diff_content : & str ) -> Result < Vec < ParsedFile > > {
61+ let mut files = Vec :: new ( ) ;
62+ let mut current_file: Option < ParsedFile > = None ;
63+ let mut current_diff = String :: new ( ) ;
64+
65+ // Debug output
66+ log:: debug!( "Parsing diff with {} lines" , diff_content. lines( ) . count( ) ) ;
67+
68+ // Add more detailed logging for debugging
69+ if log:: log_enabled!( log:: Level :: Debug ) && !diff_content. is_empty ( ) {
70+ // Make sure we truncate at a valid UTF-8 character boundary
71+ let preview = if diff_content. len ( ) > 500 {
72+ let truncated_index = diff_content
73+ . char_indices ( )
74+ . take_while ( |( i, _) | * i < 500 )
75+ . last ( )
76+ . map ( |( i, c) | i + c. len_utf8 ( ) )
77+ . unwrap_or ( 0 ) ;
78+
79+ format ! ( "{}... (truncated)" , & diff_content[ ..truncated_index] )
80+ } else {
81+ diff_content. to_string ( )
82+ } ;
83+ log:: debug!( "Diff content preview: \n {preview}" ) ;
84+ }
85+
86+ // Handle different diff formats
87+ let mut in_diff_section = false ;
88+ let mut _commit_hash_line: Option < & str > = None ;
89+
90+ // First scan to detect if this is a commit message with hash
91+ for line in diff_content. lines ( ) . take ( 3 ) {
92+ if line. len ( ) >= 40 && line. chars ( ) . take ( 40 ) . all ( |c| c. is_ascii_hexdigit ( ) ) {
93+ _commit_hash_line = Some ( line) ;
94+ break ;
95+ }
96+ }
97+
98+ // Process line by line
99+ for line in diff_content. lines ( ) {
100+ // Skip commit hash lines and other metadata
101+ if line. starts_with ( "commit " ) || ( line. len ( ) >= 40 && line. chars ( ) . take ( 40 ) . all ( |c| c. is_ascii_hexdigit ( ) ) ) || line. is_empty ( ) {
102+ continue ;
103+ }
104+
105+ // Check if we're starting a new file diff
106+ if line. starts_with ( "diff --git" ) {
107+ in_diff_section = true ;
108+ // Save previous file if exists
109+ if let Some ( mut file) = current_file. take ( ) {
110+ file. diff_content = current_diff. clone ( ) ;
111+ log:: debug!( "Adding file to results: {} ({})" , file. path, file. operation) ;
112+ files. push ( file) ;
113+ current_diff. clear ( ) ;
114+ }
115+
116+ // Extract file path more carefully
117+ let parts: Vec < & str > = line. split_whitespace ( ) . collect ( ) ;
118+ if let Some ( path) = extract_file_path_from_diff_parts ( & parts) {
119+ log:: debug!( "Found new file in diff: {path}" ) ;
120+ current_file = Some ( ParsedFile {
121+ path,
122+ operation : "modified" . to_string ( ) , // Default, will be updated
123+ diff_content : String :: new ( )
124+ } ) ;
125+ }
126+
127+ // Add the header line to the diff content
128+ current_diff. push_str ( line) ;
129+ current_diff. push ( '\n' ) ;
130+ } else if line. starts_with ( "new file mode" ) {
131+ if let Some ( ref mut file) = current_file {
132+ log:: debug!( "File {} is newly added" , file. path) ;
133+ file. operation = "added" . to_string ( ) ;
134+ }
135+ current_diff. push_str ( line) ;
136+ current_diff. push ( '\n' ) ;
137+ } else if line. starts_with ( "deleted file mode" ) {
138+ if let Some ( ref mut file) = current_file {
139+ log:: debug!( "File {} is deleted" , file. path) ;
140+ file. operation = "deleted" . to_string ( ) ;
141+ }
142+ current_diff. push_str ( line) ;
143+ current_diff. push ( '\n' ) ;
144+ } else if line. starts_with ( "rename from" ) || line. starts_with ( "rename to" ) {
145+ if let Some ( ref mut file) = current_file {
146+ log:: debug!( "File {} is renamed" , file. path) ;
147+ file. operation = "renamed" . to_string ( ) ;
148+ }
149+ current_diff. push_str ( line) ;
150+ current_diff. push ( '\n' ) ;
151+ } else if line. starts_with ( "Binary files" ) {
152+ if let Some ( ref mut file) = current_file {
153+ log:: debug!( "File {} is binary" , file. path) ;
154+ file. operation = "binary" . to_string ( ) ;
155+ }
156+ current_diff. push_str ( line) ;
157+ current_diff. push ( '\n' ) ;
158+ } else if line. starts_with ( "index " ) || line. starts_with ( "--- " ) || line. starts_with ( "+++ " ) || line. starts_with ( "@@ " ) {
159+ // These are important diff headers that should be included
160+ current_diff. push_str ( line) ;
161+ current_diff. push ( '\n' ) ;
162+ } else if in_diff_section {
163+ current_diff. push_str ( line) ;
164+ current_diff. push ( '\n' ) ;
165+ }
166+ }
167+
168+ // Don't forget the last file
169+ if let Some ( mut file) = current_file {
170+ file. diff_content = current_diff;
171+ log:: debug!( "Adding final file to results: {} ({})" , file. path, file. operation) ;
172+ files. push ( file) ;
173+ }
174+
175+ // If we didn't parse any files, check if this looks like a raw git diff output
176+ // from commands like `git show` that include commit info at the top
177+ if files. is_empty ( ) && !diff_content. trim ( ) . is_empty ( ) {
178+ log:: debug!( "Trying to parse as raw git diff output with commit info" ) ;
179+
180+ // Extract sections that start with "diff --git"
181+ let sections: Vec < & str > = diff_content. split ( "diff --git" ) . skip ( 1 ) . collect ( ) ;
182+
183+ if !sections. is_empty ( ) {
184+ for ( i, section) in sections. iter ( ) . enumerate ( ) {
185+ // Add the "diff --git" prefix back
186+ let full_section = format ! ( "diff --git{section}" ) ;
187+
188+ // Extract file path from the section more carefully
189+ let mut found_path = false ;
190+
191+ // Safer approach: iterate through lines and find the path
192+ let mut extracted_path = String :: new ( ) ;
193+ for section_line in full_section. lines ( ) . take ( 3 ) {
194+ if section_line. starts_with ( "diff --git" ) {
195+ let parts: Vec < & str > = section_line. split_whitespace ( ) . collect ( ) ;
196+ if let Some ( p) = extract_file_path_from_diff_parts ( & parts) {
197+ extracted_path = p;
198+ found_path = true ;
199+ break ;
200+ }
201+ }
202+ }
203+
204+ if found_path {
205+ log:: debug!( "Found file in section {i}: {extracted_path}" ) ;
206+ files. push ( ParsedFile {
207+ path : extracted_path,
208+ operation : "modified" . to_string ( ) , // Default
209+ diff_content : full_section
210+ } ) ;
211+ }
212+ }
213+ }
214+ }
215+
216+ // If still no files were parsed, treat the entire diff as a single change
217+ if files. is_empty ( ) && !diff_content. trim ( ) . is_empty ( ) {
218+ log:: debug!( "No standard diff format found, treating as single file change" ) ;
219+ files. push ( ParsedFile {
220+ path : "unknown" . to_string ( ) ,
221+ operation : "modified" . to_string ( ) ,
222+ diff_content : diff_content. to_string ( )
223+ } ) ;
224+ }
225+
226+ log:: debug!( "Parsed {} files from diff" , files. len( ) ) ;
227+
228+ // Add detailed debug output for each parsed file
229+ if log:: log_enabled!( log:: Level :: Debug ) {
230+ for ( i, file) in files. iter ( ) . enumerate ( ) {
231+ let content_preview = if file. diff_content . len ( ) > 200 {
232+ // Make sure we truncate at a valid UTF-8 character boundary
233+ let truncated_index = file
234+ . diff_content
235+ . char_indices ( )
236+ . take_while ( |( i, _) | * i < 200 )
237+ . last ( )
238+ . map ( |( i, c) | i + c. len_utf8 ( ) )
239+ . unwrap_or ( 0 ) ;
240+
241+ format ! ( "{}... (truncated)" , & file. diff_content[ ..truncated_index] )
242+ } else {
243+ file. diff_content . clone ( )
244+ } ;
245+ log:: debug!( "File {}: {} ({})\n Content preview:\n {}" , i, file. path, file. operation, content_preview) ;
246+ }
247+ }
248+
249+ Ok ( files)
250+ }
251+
252+ #[ cfg( test) ]
253+ mod tests {
254+ use super :: * ;
255+
256+ #[ test]
257+ fn test_parse_diff ( ) {
258+ let diff = r#"diff --git a/src/main.rs b/src/main.rs
259+ index 1234567..abcdefg 100644
260+ --- a/src/main.rs
261+ +++ b/src/main.rs
262+ @@ -1,5 +1,6 @@
263+ fn main() {
264+ - println!("Hello");
265+ + println!("Hello, world!");
266+ + println!("New line");
267+ }
268+ diff --git a/Cargo.toml b/Cargo.toml
269+ new file mode 100644
270+ index 0000000..1111111
271+ --- /dev/null
272+ +++ b/Cargo.toml
273+ @@ -0,0 +1,8 @@
274+ +[package]
275+ +name = "test"
276+ +version = "0.1.0"
277+ "# ;
278+
279+ let files = parse_diff ( diff) . unwrap ( ) ;
280+ assert_eq ! ( files. len( ) , 2 ) ;
281+ assert_eq ! ( files[ 0 ] . path, "src/main.rs" ) ;
282+ assert_eq ! ( files[ 0 ] . operation, "modified" ) ;
283+ assert_eq ! ( files[ 1 ] . path, "Cargo.toml" ) ;
284+ assert_eq ! ( files[ 1 ] . operation, "added" ) ;
285+
286+ // Verify files contain diff content
287+ assert ! ( !files[ 0 ] . diff_content. is_empty( ) ) ;
288+ assert ! ( !files[ 1 ] . diff_content. is_empty( ) ) ;
289+ }
290+
291+ #[ test]
292+ fn test_parse_diff_with_commit_hash ( ) {
293+ // Test with a commit hash and message before the diff
294+ let diff = r#"0472ffa1665c4c5573fb8f7698c9965122eda675 Update files
295+
296+ diff --git a/test.js b/test.js
297+ new file mode 100644
298+ index 0000000..a730e61
299+ --- /dev/null
300+ +++ b/test.js
301+ @@ -0,0 +1 @@
302+ +console.log('Hello');
303+ "# ;
304+
305+ let files = parse_diff ( diff) . unwrap ( ) ;
306+ assert_eq ! ( files. len( ) , 1 ) ;
307+ assert_eq ! ( files[ 0 ] . path, "test.js" ) ;
308+ assert_eq ! ( files[ 0 ] . operation, "added" ) ;
309+ }
310+
311+ #[ test]
312+ fn test_parse_diff_with_c_i_prefixes ( ) {
313+ // Test with c/ and i/ prefixes that appear in git hook diffs
314+ let diff = r#"diff --git c/test.md i/test.md
315+ new file mode 100644
316+ index 0000000..6c61a60
317+ --- /dev/null
318+ +++ i/test.md
319+ @@ -0,0 +1 @@
320+ +# Test File
321+
322+ diff --git c/test.js i/test.js
323+ new file mode 100644
324+ index 0000000..a730e61
325+ --- /dev/null
326+ +++ i/test.js
327+ @@ -0,0 +1 @@
328+ +console.log('Hello');
329+ "# ;
330+
331+ let files = parse_diff ( diff) . unwrap ( ) ;
332+ assert_eq ! ( files. len( ) , 2 ) ;
333+ assert_eq ! ( files[ 0 ] . path, "test.md" , "Should extract clean path without c/ prefix" ) ;
334+ assert_eq ! ( files[ 0 ] . operation, "added" ) ;
335+ assert_eq ! ( files[ 1 ] . path, "test.js" , "Should extract clean path without i/ prefix" ) ;
336+ assert_eq ! ( files[ 1 ] . operation, "added" ) ;
337+
338+ // Verify files contain diff content
339+ assert ! ( files[ 0 ] . diff_content. contains( "# Test File" ) ) ;
340+ assert ! ( files[ 1 ] . diff_content. contains( "console.log" ) ) ;
341+ }
342+
343+ #[ test]
344+ fn test_parse_diff_with_deleted_file ( ) {
345+ let diff = r#"diff --git a/test.txt b/test.txt
346+ deleted file mode 100644
347+ index 9daeafb..0000000
348+ --- a/test.txt
349+ +++ /dev/null
350+ @@ -1 +0,0 @@
351+ -test
352+ "# ;
353+
354+ let files = parse_diff ( diff) . unwrap ( ) ;
355+ assert_eq ! ( files. len( ) , 1 ) ;
356+ assert_eq ! ( files[ 0 ] . path, "test.txt" ) ;
357+ assert_eq ! ( files[ 0 ] . operation, "deleted" ) ;
358+ }
359+ }
0 commit comments