diff --git a/src/authorship/stats.rs b/src/authorship/stats.rs index 02cfbd4f..8112bb93 100644 --- a/src/authorship/stats.rs +++ b/src/authorship/stats.rs @@ -1,6 +1,6 @@ use crate::authorship::transcript::Message; use crate::error::GitAiError; -use crate::git::refs::get_authorship; +use crate::git::refs::{get_authorship, grep_ai_notes_paths, read_note_from_path}; use crate::git::repository::Repository; use crate::{authorship::authorship_log::LineRange, utils::debug_log}; use serde::{Deserialize, Serialize}; @@ -565,16 +565,73 @@ pub fn stats_for_commit_stats( let (git_diff_added_lines, git_diff_deleted_lines) = get_git_diff_stats(repo, commit_sha)?; // Step 2: get the authorship log for this commit - let authorship_log = get_authorship(repo, &commit_sha); + let authorship_log = get_authorship(repo, commit_sha); - // Step 3: Calculate stats from authorship log + // Step 3: Resolve foreign prompts - prompts that are referenced in attestations but stored in other commits + let resolved_log = authorship_log.map(|mut log| { + resolve_foreign_prompts(repo, &mut log); + log + }); + + // Step 4: Calculate stats from authorship log Ok(stats_from_authorship_log( - authorship_log.as_ref(), + resolved_log.as_ref(), git_diff_added_lines, git_diff_deleted_lines, )) } +/// Resolve foreign prompts - prompts referenced in attestations but stored in other commits' notes. +/// This happens when authorship data is merged/squashed across branches without copying the full prompt records. +fn resolve_foreign_prompts( + repo: &Repository, + log: &mut crate::authorship::authorship_log_serialization::AuthorshipLog, +) { + use std::collections::HashSet; + + // Collect all hashes referenced in attestations that don't have a local prompt + let mut missing_hashes: HashSet = HashSet::new(); + for file_attestation in &log.attestations { + for entry in &file_attestation.entries { + if !log.metadata.prompts.contains_key(&entry.hash) { + missing_hashes.insert(entry.hash.clone()); + } + } + } + + if missing_hashes.is_empty() { + return; + } + + debug_log(&format!( + "Resolving {} foreign prompts for stats calculation", + missing_hashes.len() + )); + + // Look up each missing prompt from other commits' notes + for hash in missing_hashes { + // Use git grep to find note paths that contain this prompt hash + // This returns note blob paths even for deleted commits + if let Ok(note_paths) = + grep_ai_notes_paths(repo, &format!("\"{}\"", &hash)) + { + // Try each note path until we find the prompt + for note_path in note_paths { + if let Some(other_log) = read_note_from_path(repo, ¬e_path) { + if let Some(prompt_record) = other_log.metadata.prompts.get(&hash) { + debug_log(&format!( + "Found foreign prompt {} in note path {}", + hash, note_path + )); + log.metadata.prompts.insert(hash.clone(), prompt_record.clone()); + break; + } + } + } + } + } +} + /// Get git diff statistics between commit and its parent pub fn get_git_diff_stats(repo: &Repository, commit_sha: &str) -> Result<(u32, u32), GitAiError> { // Use git show --numstat to get diff statistics @@ -979,4 +1036,123 @@ mod tests { "Git diff shows 0 deleted lines" ); } + + #[test] + fn test_stats_resolves_foreign_prompts_via_note_path() { + // This test verifies that stats correctly resolves "foreign prompts" - + // prompts that are referenced in attestations but stored in other commits' notes. + // + // This bug occurs when: + // 1. A commit has attestations pointing to a prompt hash (e.g., "10d7219") + // 2. The prompts section is empty in that commit's note + // 3. The prompt exists in another commit's note (possibly a deleted commit) + // + // The fix uses grep_ai_notes_paths() and read_note_from_path() to find + // prompts even when the original commit has been deleted. + + use crate::authorship::authorship_log_serialization::{ + AttestationEntry, AuthorshipLog, AuthorshipMetadata, FileAttestation, + AUTHORSHIP_LOG_VERSION, + }; + use crate::authorship::authorship_log::LineRange; + use crate::git::refs::notes_add; + + let tmp_repo = TmpRepo::new().unwrap(); + + // Step 1: Create initial commit with a file + let mut file = tmp_repo.write_file("test.txt", "Line1\n", true).unwrap(); + tmp_repo + .trigger_checkpoint_with_author("test_user") + .unwrap(); + tmp_repo.commit_with_message("Initial commit").unwrap(); + + // Step 2: Create a commit with AI authorship - this generates a note with both + // attestations and prompts + file.append("AI Line 2\nAI Line 3\n").unwrap(); + tmp_repo + .trigger_checkpoint_with_ai("Claude", Some("claude-3-sonnet"), Some("cursor")) + .unwrap(); + tmp_repo.commit_with_message("AI adds lines").unwrap(); + + let ai_commit_sha = tmp_repo.get_head_commit_sha().unwrap(); + + // Get the authorship log from the AI commit to extract the prompt hash + let ai_commit_log = get_authorship(&tmp_repo.gitai_repo(), &ai_commit_sha).unwrap(); + + // Verify the AI commit has prompts + assert!( + !ai_commit_log.metadata.prompts.is_empty(), + "AI commit should have prompts in its note" + ); + + // Extract the prompt hash from the AI commit + let prompt_hash = ai_commit_log + .metadata + .prompts + .keys() + .next() + .unwrap() + .clone(); + + // Step 3: Create a new commit with human changes + file.append("Human Line 4\nHuman Line 5\n").unwrap(); + tmp_repo + .trigger_checkpoint_with_author("test_user") + .unwrap(); + tmp_repo + .commit_with_message("Human adds lines after AI") + .unwrap(); + + let new_commit_sha = tmp_repo.get_head_commit_sha().unwrap(); + + // Step 4: Manually create a note for the new commit that has attestations + // pointing to the AI prompt hash, but with EMPTY prompts. + // This simulates the bug where prompts are stored in a different commit's note. + let mut orphaned_note = AuthorshipLog::new(); + orphaned_note.metadata = AuthorshipMetadata { + schema_version: AUTHORSHIP_LOG_VERSION.to_string(), + git_ai_version: Some("test".to_string()), + base_commit_sha: new_commit_sha.clone(), + prompts: std::collections::BTreeMap::new(), // Empty prompts - this is the bug! + }; + + // Add attestation referencing the foreign prompt hash + let mut file_attestation = FileAttestation::new("test.txt".to_string()); + file_attestation.add_entry(AttestationEntry::new( + prompt_hash.clone(), + vec![LineRange::Single(2), LineRange::Single(3)], // Lines 2-3 are AI + )); + orphaned_note.attestations.push(file_attestation); + + // Write this crafted note (with empty prompts) to the new commit + let note_content = orphaned_note.serialize_to_string().unwrap(); + notes_add(&tmp_repo.gitai_repo(), &new_commit_sha, ¬e_content).unwrap(); + + // Verify the note was written with empty prompts + let written_log = get_authorship(&tmp_repo.gitai_repo(), &new_commit_sha).unwrap(); + assert!( + written_log.metadata.prompts.is_empty(), + "The crafted note should have empty prompts (simulating the bug)" + ); + assert!( + !written_log.attestations.is_empty(), + "The crafted note should have attestations referencing the foreign prompt" + ); + + // Step 5: Verify that stats correctly resolves the foreign prompt + // by searching note paths (not commit SHAs which may not exist). + // + // Without the fix: ai_accepted = 0, ai_additions = 0 + // With the fix: finds the prompt via grep_ai_notes_paths + read_note_from_path + let stats_result = stats_for_commit_stats(&tmp_repo.gitai_repo(), &new_commit_sha).unwrap(); + + assert_eq!( + stats_result.ai_accepted, 2, + "Stats should resolve foreign prompt and show 2 AI-accepted lines" + ); + assert!( + stats_result.ai_additions > 0, + "Stats should show AI additions after resolving foreign prompt" + ); + } } diff --git a/src/git/refs.rs b/src/git/refs.rs index 71fe425d..7926394d 100644 --- a/src/git/refs.rs +++ b/src/git/refs.rs @@ -180,49 +180,6 @@ pub fn get_reference_as_authorship_log_v3( Ok(authorship_log) } -#[cfg(test)] -mod tests { - use super::*; - use crate::git::test_utils::TmpRepo; - - #[test] - fn test_notes_add_and_show_authorship_note() { - // Create a temporary repository - let tmp_repo = TmpRepo::new().expect("Failed to create tmp repo"); - - // Create a commit first - tmp_repo - .commit_with_message("Initial commit") - .expect("Failed to create initial commit"); - - // Get the commit SHA - let commit_sha = tmp_repo - .get_head_commit_sha() - .expect("Failed to get head commit SHA"); - - // Test data - simple string content - let note_content = "This is a test authorship note with some random content!"; - - // Add the authorship note (force overwrite since commit_with_message already created one) - notes_add(tmp_repo.gitai_repo(), &commit_sha, note_content) - .expect("Failed to add authorship note"); - - // Read the note back - let retrieved_content = show_authorship_note(tmp_repo.gitai_repo(), &commit_sha) - .expect("Failed to retrieve authorship note"); - - // Assert the content matches exactly - assert_eq!(retrieved_content, note_content); - - // Test that non-existent commit returns None - let non_existent_content = show_authorship_note( - tmp_repo.gitai_repo(), - "0000000000000000000000000000000000000000", - ); - assert!(non_existent_content.is_none()); - } -} - /// Sanitize a remote name to create a safe ref name /// Replaces special characters with underscores to ensure valid ref names fn sanitize_remote_name(remote: &str) -> String { @@ -343,3 +300,235 @@ pub fn grep_ai_notes(repo: &Repository, pattern: &str) -> Result, Gi Ok(shas.into_iter().collect()) } } + +/// Search AI notes for a pattern and return matching note paths (not commit SHAs). +/// This works even for notes attached to deleted commits. +/// (hopefully no scm platform deletes hanging notes) +pub fn grep_ai_notes_paths(repo: &Repository, pattern: &str) -> Result, GitAiError> { + let mut args = repo.global_args_for_exec(); + args.push("--no-pager".to_string()); + args.push("grep".to_string()); + args.push("-l".to_string()); // Only show file paths + args.push(pattern.to_string()); + args.push("refs/notes/ai".to_string()); + + let output = exec_git(&args)?; + let stdout = String::from_utf8(output.stdout) + .map_err(|_| GitAiError::Generic("Failed to parse git grep output".to_string()))?; + + // Parse output format: refs/notes/ai:ab/cdef123... + // Extract the path portion (ab/cdef123...) + let mut paths: HashSet = HashSet::new(); + for line in stdout.lines() { + if let Some(path) = line.strip_prefix("refs/notes/ai:") { + paths.insert(path.to_string()); + } + } + + Ok(paths.into_iter().collect()) +} + +/// Read a note directly from its path in the notes tree. +/// This works even when the commit the note is attached to has been deleted. +/// Accepts paths in either format: "ab/cdef123..." or "abcdef123..." +/// Git uses flat paths for small repos and tree paths (with slash) for large repos. +pub fn read_note_from_path(repo: &Repository, note_path: &str) -> Option { + let original = note_path.to_string(); + let flat = note_path.replace('/', ""); + let tree = if flat.len() >= 2 { + format!("{}/{}", &flat[..2], &flat[2..]) + } else { + return None; + }; + + // Build deduped list: original first, then flat, then tree + let mut paths = Vec::with_capacity(3); + for path in [original, flat, tree] { + if !paths.contains(&path) { + paths.push(path); + } + } + + for path in paths { + let mut args = repo.global_args_for_exec(); + args.push("show".to_string()); + args.push(format!("refs/notes/ai:{}", path)); + + if let Ok(output) = exec_git(&args) { + if let Ok(content) = String::from_utf8(output.stdout) { + if let Ok(log) = AuthorshipLog::deserialize_from_string(&content) { + return Some(log); + } + } + } + } + + None +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::git::test_utils::TmpRepo; + + #[test] + fn test_notes_add_and_show_authorship_note() { + // Create a temporary repository + let tmp_repo = TmpRepo::new().expect("Failed to create tmp repo"); + + // Create a commit first + tmp_repo + .commit_with_message("Initial commit") + .expect("Failed to create initial commit"); + + // Get the commit SHA + let commit_sha = tmp_repo + .get_head_commit_sha() + .expect("Failed to get head commit SHA"); + + // Test data - simple string content + let note_content = "This is a test authorship note with some random content!"; + + // Add the authorship note (force overwrite since commit_with_message already created one) + notes_add(tmp_repo.gitai_repo(), &commit_sha, note_content) + .expect("Failed to add authorship note"); + + // Read the note back + let retrieved_content = show_authorship_note(tmp_repo.gitai_repo(), &commit_sha) + .expect("Failed to retrieve authorship note"); + + // Assert the content matches exactly + assert_eq!(retrieved_content, note_content); + + // Test that non-existent commit returns None + let non_existent_content = show_authorship_note( + tmp_repo.gitai_repo(), + "0000000000000000000000000000000000000000", + ); + assert!(non_existent_content.is_none()); + } + + #[test] + fn test_read_note_from_path_both_formats() { + // Test that read_note_from_path can read notes using both path formats: + // - Flat path: "abcdef123..." (used by git for small repos) + // - Tree path: "ab/cdef123..." (used by git for large repos) + // + // The function should handle both formats transparently. + + let tmp_repo = TmpRepo::new().unwrap(); + + // Create a commit with AI authorship + let mut file = tmp_repo.write_file("test.txt", "Line1\n", true).unwrap(); + tmp_repo + .trigger_checkpoint_with_author("test_user") + .unwrap(); + tmp_repo.commit_with_message("Initial commit").unwrap(); + + file.append("AI Line 2\n").unwrap(); + tmp_repo + .trigger_checkpoint_with_ai("TestAI", Some("test-model"), Some("test-tool")) + .unwrap(); + tmp_repo.commit_with_message("AI commit").unwrap(); + + let commit_sha = tmp_repo.get_head_commit_sha().unwrap(); + + // Generate both path formats from the commit SHA + let flat_path = commit_sha.clone(); + let tree_path = format!("{}/{}", &commit_sha[..2], &commit_sha[2..]); + + // Test flat path format (abcdef123...) + // This should work in small repos where git stores notes flat + let note_flat = read_note_from_path(&tmp_repo.gitai_repo(), &flat_path); + assert!( + note_flat.is_some(), + "Should be able to read note from flat path {}", + flat_path + ); + assert!( + !note_flat.unwrap().metadata.prompts.is_empty(), + "Note from flat path should have prompts" + ); + + // Test tree path format (ab/cdef123...) + // The function should normalize this to flat format if needed + let note_tree = read_note_from_path(&tmp_repo.gitai_repo(), &tree_path); + assert!( + note_tree.is_some(), + "Should be able to read note from tree path {} (normalized to flat)", + tree_path + ); + assert!( + !note_tree.unwrap().metadata.prompts.is_empty(), + "Note from tree path should have prompts" + ); + } + + #[test] + fn test_grep_ai_notes_paths_finds_unreachable_commits() { + // Test that grep_ai_notes_paths can find notes even when the commit + // they're attached to is no longer reachable from any branch + // (e.g., after a reset or rebase). The commit object may still exist + // in the object store but is not reachable via any ref. + use crate::git::repository::exec_git; + + let tmp_repo = TmpRepo::new().unwrap(); + + // Create initial commit + let mut file = tmp_repo.write_file("test.txt", "Line1\n", true).unwrap(); + tmp_repo + .trigger_checkpoint_with_author("test_user") + .unwrap(); + tmp_repo.commit_with_message("Initial commit").unwrap(); + let initial_sha = tmp_repo.get_head_commit_sha().unwrap(); + + // Create a commit with AI authorship + file.append("AI Line 2\n").unwrap(); + tmp_repo + .trigger_checkpoint_with_ai("TestAI", Some("test-model"), Some("test-tool")) + .unwrap(); + tmp_repo.commit_with_message("AI commit").unwrap(); + + let ai_commit_sha = tmp_repo.get_head_commit_sha().unwrap(); + + // Get the prompt hash before we make the commit unreachable + let log = get_authorship(&tmp_repo.gitai_repo(), &ai_commit_sha).unwrap(); + let prompt_hash = log.metadata.prompts.keys().next().unwrap().clone(); + + // Make the AI commit unreachable by resetting to the initial commit + // The note will still exist in refs/notes/ai even though the commit is unreachable + let mut args = tmp_repo.gitai_repo().global_args_for_exec(); + args.extend(["reset", "--hard", &initial_sha].map(String::from)); + exec_git(&args).expect("Failed to reset"); + + // Verify the commit is no longer reachable from HEAD + let mut args = tmp_repo.gitai_repo().global_args_for_exec(); + args.extend(["branch", "--contains", &ai_commit_sha].map(String::from)); + let result = exec_git(&args).expect("branch --contains failed"); + let branches = String::from_utf8_lossy(&result.stdout); + assert!( + branches.trim().is_empty(), + "The AI commit should not be reachable from any branch after reset" + ); + + // grep_ai_notes_paths should still find the note + let paths = + grep_ai_notes_paths(&tmp_repo.gitai_repo(), &format!("\"{}\"", prompt_hash)).unwrap(); + + assert!( + !paths.is_empty(), + "grep_ai_notes_paths should find notes even for unreachable commits" + ); + + // And we should be able to read the note content + let note = read_note_from_path(&tmp_repo.gitai_repo(), &paths[0]); + assert!( + note.is_some(), + "Should be able to read note from unreachable commit via path" + ); + assert!( + note.unwrap().metadata.prompts.contains_key(&prompt_hash), + "Note should contain the expected prompt" + ); + } +}