Implement parallel git diff analysis algorithm

Copilot · oleander · Copilot · commit 1f7edd06628e · 2025-10-05T22:24:04.000Z
Co-authored-by: oleander &lt;220827+oleander@users.noreply.github.com&gt;
diff --git a/src/commit.rs b/src/commit.rs
@@ -6,7 +6,7 @@ use async_openai::Client;
 use crate::{config, debug_output, openai, profile};
 use crate::model::Model;
 use crate::config::AppConfig;
-use crate::multi_step_integration::{generate_commit_message_local, generate_commit_message_multi_step};
+use crate::multi_step_integration::{generate_commit_message_local, generate_commit_message_multi_step, generate_commit_message_parallel};
 
 /// The instruction template included at compile time
 const INSTRUCTION_TEMPLATE: &str = include_str!("../resources/prompt.md");
@@ -117,16 +117,25 @@ pub async fn generate(patch: String, remaining_tokens: usize, model: Model, sett
             let client = Client::with_config(config);
             let model_str = model.to_string();
 
-            match generate_commit_message_multi_step(&client, &model_str, &patch, max_length).await {
+            // Try parallel approach first
+            match generate_commit_message_parallel(&client, &model_str, &patch, max_length).await {
               Ok(message) => return Ok(openai::Response { response: message }),
               Err(e) => {
                 // Check if it's an API key error
                 if e.to_string().contains("invalid_api_key") || e.to_string().contains("Incorrect API key") {
                   bail!("Invalid OpenAI API key. Please check your API key configuration.");
                 }
-                log::warn!("Multi-step generation with custom settings failed: {e}");
-                if let Some(session) = debug_output::debug_session() {
-                  session.set_multi_step_error(e.to_string());
+                log::warn!("Parallel generation with custom settings failed, trying multi-step: {e}");
+                
+                // Fallback to old multi-step approach
+                match generate_commit_message_multi_step(&client, &model_str, &patch, max_length).await {
+                  Ok(message) => return Ok(openai::Response { response: message }),
+                  Err(e2) => {
+                    log::warn!("Multi-step generation with custom settings also failed: {e2}");
+                    if let Some(session) = debug_output::debug_session() {
+                      session.set_multi_step_error(e2.to_string());
+                    }
+                  }
                 }
               }
             }
@@ -145,16 +154,25 @@ pub async fn generate(patch: String, remaining_tokens: usize, model: Model, sett
         let client = Client::new();
         let model_str = model.to_string();
 
-        match generate_commit_message_multi_step(&client, &model_str, &patch, max_length).await {
+        // Try parallel approach first
+        match generate_commit_message_parallel(&client, &model_str, &patch, max_length).await {
           Ok(message) => return Ok(openai::Response { response: message }),
           Err(e) => {
             // Check if it's an API key error
             if e.to_string().contains("invalid_api_key") || e.to_string().contains("Incorrect API key") {
               bail!("Invalid OpenAI API key. Please check your API key configuration.");
             }
-            log::warn!("Multi-step generation failed: {e}");
-            if let Some(session) = debug_output::debug_session() {
-              session.set_multi_step_error(e.to_string());
+            log::warn!("Parallel generation failed, trying multi-step: {e}");
+            
+            // Fallback to old multi-step approach
+            match generate_commit_message_multi_step(&client, &model_str, &patch, max_length).await {
+              Ok(message) => return Ok(openai::Response { response: message }),
+              Err(e2) => {
+                log::warn!("Multi-step generation also failed: {e2}");
+                if let Some(session) = debug_output::debug_session() {
+                  session.set_multi_step_error(e2.to_string());
+                }
+              }
             }
           }
         }
diff --git a/src/multi_step_integration.rs b/src/multi_step_integration.rs
@@ -591,6 +591,174 @@ async fn select_best_candidate(
   }
 }
 
+/// Optimized parallel approach for commit message generation
+/// This replaces the sequential multi-step approach with true parallel processing
+pub async fn generate_commit_message_parallel(
+  client: &Client<OpenAIConfig>, model: &str, diff_content: &str, max_length: Option<usize>
+) -> Result<String> {
+  log::info!("Starting parallel commit message generation");
+
+  // Parse the diff to extract individual files
+  let parsed_files = parse_diff(diff_content)?;
+  log::info!("Parsed {} files from diff", parsed_files.len());
+
+  if parsed_files.is_empty() {
+    anyhow::bail!("No files found in diff");
+  }
+
+  // Phase 1: Analyze each file in parallel using simplified approach
+  log::debug!("Starting parallel analysis of {} files", parsed_files.len());
+  
+  let analysis_futures: Vec<_> = parsed_files
+    .iter()
+    .map(|file| {
+      let file_path = file.path.clone();
+      let operation = file.operation.clone();
+      let diff_content = file.diff_content.clone();
+      async move {
+        analyze_single_file_simple(client, model, &file_path, &operation, &diff_content).await
+      }
+    })
+    .collect();
+
+  // Execute all file analyses concurrently
+  let analysis_results = join_all(analysis_futures).await;
+
+  // Collect successful analyses
+  let mut successful_analyses = Vec::new();
+  for (i, result) in analysis_results.into_iter().enumerate() {
+    match result {
+      Ok(summary) => {
+        log::debug!("Successfully analyzed file {}: {}", i, parsed_files[i].path);
+        successful_analyses.push((parsed_files[i].path.clone(), summary));
+      }
+      Err(e) => {
+        // Check if it's an API key error - if so, propagate immediately
+        let error_str = e.to_string();
+        if error_str.contains("invalid_api_key") || error_str.contains("Incorrect API key") || error_str.contains("Invalid API key") {
+          return Err(e);
+        }
+        log::warn!("Failed to analyze file {}: {}", parsed_files[i].path, e);
+        // Continue with other files
+      }
+    }
+  }
+
+  if successful_analyses.is_empty() {
+    anyhow::bail!("Failed to analyze any files in parallel");
+  }
+
+  // Phase 2: Synthesize final commit message from all analyses
+  log::debug!("Synthesizing final commit message from {} analyses", successful_analyses.len());
+  
+  let synthesis_result = synthesize_commit_message(
+    client,
+    model,
+    &successful_analyses,
+    max_length.unwrap_or(72),
+  ).await?;
+
+  Ok(synthesis_result)
+}
+
+/// Analyzes a single file using simplified text completion (no function calling)
+async fn analyze_single_file_simple(
+  client: &Client<OpenAIConfig>,
+  model: &str,
+  file_path: &str,
+  operation: &str,
+  diff_content: &str,
+) -> Result<String> {
+  let system_prompt = "You are a git diff analyzer. Analyze the provided file change and provide a concise summary in 1-2 sentences describing what changed and why it matters.";
+
+  let user_prompt = format!(
+    "File: {}\nOperation: {}\nDiff:\n{}\n\nProvide a concise summary (1-2 sentences) of what changed and why it matters:",
+    file_path, operation, diff_content
+  );
+
+  let request = CreateChatCompletionRequestArgs::default()
+    .model(model)
+    .messages(vec![
+      ChatCompletionRequestSystemMessageArgs::default()
+        .content(system_prompt)
+        .build()?
+        .into(),
+      ChatCompletionRequestUserMessageArgs::default()
+        .content(user_prompt)
+        .build()?
+        .into(),
+    ])
+    .max_tokens(150u32) // Keep responses concise
+    .build()?;
+
+  let response = client.chat().create(request).await?;
+
+  let content = response.choices[0]
+    .message
+    .content
+    .as_ref()
+    .ok_or_else(|| anyhow::anyhow!("No content in response"))?;
+
+  Ok(content.trim().to_string())
+}
+
+/// Synthesizes a final commit message from multiple file analyses
+async fn synthesize_commit_message(
+  client: &Client<OpenAIConfig>,
+  model: &str,
+  analyses: &[(String, String)],
+  max_length: usize,
+) -> Result<String> {
+  // Build context from all analyses
+  let mut context = String::new();
+  context.push_str("File changes summary:\n");
+  for (file_path, summary) in analyses {
+    context.push_str(&format!("• {}: {}\n", file_path, summary));
+  }
+
+  let system_prompt = format!(
+    "You are a git commit message expert. Based on the file change summaries provided, generate a concise, descriptive commit message that captures the essential nature of the changes. The message should be {} characters or less and follow conventional commit format when appropriate. Focus on WHAT changed and WHY, not just listing files.",
+    max_length
+  );
+
+  let user_prompt = format!(
+    "{}\n\nGenerate a commit message (max {} characters) that captures the essential nature of these changes:",
+    context, max_length
+  );
+
+  let request = CreateChatCompletionRequestArgs::default()
+    .model(model)
+    .messages(vec![
+      ChatCompletionRequestSystemMessageArgs::default()
+        .content(system_prompt)
+        .build()?
+        .into(),
+      ChatCompletionRequestUserMessageArgs::default()
+        .content(user_prompt)
+        .build()?
+        .into(),
+    ])
+    .max_tokens(100u32) // Commit messages should be short
+    .build()?;
+
+  let response = client.chat().create(request).await?;
+
+  let content = response.choices[0]
+    .message
+    .content
+    .as_ref()
+    .ok_or_else(|| anyhow::anyhow!("No content in response"))?;
+
+  let message = content.trim().to_string();
+
+  // Ensure message length doesn't exceed limit
+  if message.len() > max_length {
+    Ok(message.chars().take(max_length - 3).collect::<String>() + "...")
+  } else {
+    Ok(message)
+  }
+}
+
 /// Alternative: Use the multi-step analysis locally without OpenAI calls
 pub fn generate_commit_message_local(diff_content: &str, max_length: Option<usize>) -> Result<String> {
   use crate::multi_step_analysis::{analyze_file, calculate_impact_scores, generate_commit_messages};
@@ -807,4 +975,66 @@ index 1234567..abcdefg 100644
     assert!(!message.is_empty());
     assert!(message.len() <= 72);
   }
+
+  #[tokio::test]
+  async fn test_parallel_generation_parsing() {
+    // Test that the parallel approach correctly handles multi-file diffs
+    let diff = r#"diff --git a/src/auth.rs b/src/auth.rs
+index 1234567..abcdefg 100644
+--- a/src/auth.rs
++++ b/src/auth.rs
+@@ -1,3 +1,4 @@
++use crate::security;
+ pub fn authenticate() {
+     // authentication logic
+ }
+diff --git a/src/main.rs b/src/main.rs
+index abcd123..efgh456 100644
+--- a/src/main.rs
++++ b/src/main.rs
+@@ -1,2 +1,3 @@
+ fn main() {
+     println!("Hello");
++    auth::authenticate();
+ }"#;
+
+    // Parse files to ensure parsing works correctly for parallel processing
+    let files = parse_diff(diff).unwrap();
+    assert_eq!(files.len(), 2);
+    assert_eq!(files[0].path, "src/auth.rs");
+    assert_eq!(files[1].path, "src/main.rs");
+    
+    // Verify diff content is captured
+    assert!(files[0].diff_content.contains("use crate::security"));
+    assert!(files[1].diff_content.contains("auth::authenticate"));
+  }
+  
+  #[test]
+  fn test_parse_diff_edge_cases() {
+    // Test parsing with various git prefixes and edge cases
+    let diff_with_dev_null = r#"diff --git a/old_file.txt b/dev/null
+deleted file mode 100644
+index 1234567..0000000
+--- a/old_file.txt
++++ /dev/null
+@@ -1,2 +0,0 @@
+-Old content
+-To be removed"#;
+
+    let files = parse_diff(diff_with_dev_null).unwrap();
+    assert_eq!(files.len(), 1);
+    assert_eq!(files[0].path, "old_file.txt", "Should extract original path for deleted files");
+    assert_eq!(files[0].operation, "deleted");
+    
+    // Test with binary files
+    let diff_binary = r#"diff --git a/image.png b/image.png
+new file mode 100644
+index 0000000..1234567
+Binary files /dev/null and b/image.png differ"#;
+    
+    let files = parse_diff(diff_binary).unwrap();
+    assert_eq!(files.len(), 1);
+    assert_eq!(files[0].path, "image.png");
+    assert_eq!(files[0].operation, "binary");
+  }
 }
diff --git a/src/openai.rs b/src/openai.rs
@@ -11,7 +11,7 @@ use futures::future::join_all;
 use crate::{commit, config, debug_output, function_calling, profile};
 use crate::model::Model;
 use crate::config::AppConfig;
-use crate::multi_step_integration::generate_commit_message_multi_step;
+use crate::multi_step_integration::{generate_commit_message_multi_step, generate_commit_message_parallel};
 
 const MAX_ATTEMPTS: usize = 3;
 
@@ -205,14 +205,23 @@ pub async fn call_with_config(request: Request, config: OpenAIConfig) -> Result<
   let client = Client::with_config(config.clone());
   let model = request.model.to_string();
 
-  match generate_commit_message_multi_step(&client, &model, &request.prompt, config::APP_CONFIG.max_commit_length).await {
+  // Try parallel approach first
+  match generate_commit_message_parallel(&client, &model, &request.prompt, config::APP_CONFIG.max_commit_length).await {
     Ok(message) => return Ok(Response { response: message }),
     Err(e) => {
       // Check if it's an API key error and propagate it
       if e.to_string().contains("invalid_api_key") || e.to_string().contains("Incorrect API key") {
         return Err(e);
       }
-      log::warn!("Multi-step approach failed, falling back to single-step: {e}");
+      log::warn!("Parallel approach failed, trying multi-step: {e}");
+      
+      // Fallback to old multi-step approach
+      match generate_commit_message_multi_step(&client, &model, &request.prompt, config::APP_CONFIG.max_commit_length).await {
+        Ok(message) => return Ok(Response { response: message }),
+        Err(e2) => {
+          log::warn!("Multi-step approach also failed, falling back to single-step: {e2}");
+        }
+      }
     }
   }