@@ -77,56 +77,61 @@ function cleanWorkflowName(name: string): string {
7777async function getScheduledFailures ( ) : Promise < FailureInfo [ ] > {
7878 log ( "Checking for scheduled workflow failures..." ) ;
7979
80- // Get recent scheduled runs
81- const runsJson = execCommand (
82- 'gh run list --limit 50 --json status,conclusion,workflowName,createdAt,headSha,url,event,databaseId' ,
83- { silent : true }
84- ) ;
85-
86- const allRuns : WorkflowRun [ ] = JSON . parse ( runsJson ) ;
87- const scheduledRuns = allRuns . filter ( run => run . event === 'schedule' ) ;
88-
89- log ( `DEBUG: Found ${ scheduledRuns . length } scheduled runs` ) ;
80+ try {
81+ // Get recent scheduled runs
82+ const runsJson = execCommand (
83+ 'gh run list --limit 50 --json status,conclusion,workflowName,createdAt,headSha,url,event,databaseId' ,
84+ { silent : true }
85+ ) ;
86+
87+ const allRuns : WorkflowRun [ ] = JSON . parse ( runsJson ) ;
88+ const scheduledRuns = allRuns . filter ( run => run . event === 'schedule' ) ;
9089
91- // Group by workflow and find most recent run per workflow
92- const workflowGroups = new Map < string , WorkflowRun [ ] > ( ) ;
93- scheduledRuns . forEach ( run => {
94- if ( ! workflowGroups . has ( run . workflowName ) ) {
95- workflowGroups . set ( run . workflowName , [ ] ) ;
90+ log ( `DEBUG: Found ${ scheduledRuns . length } scheduled runs` ) ;
91+
92+ // Group by workflow and find most recent run per workflow
93+ const workflowGroups = new Map < string , WorkflowRun [ ] > ( ) ;
94+ scheduledRuns . forEach ( run => {
95+ if ( ! workflowGroups . has ( run . workflowName ) ) {
96+ workflowGroups . set ( run . workflowName , [ ] ) ;
97+ }
98+ workflowGroups . get ( run . workflowName ) ! . push ( run ) ;
99+ } ) ;
100+
101+ const failures : FailureInfo [ ] = [ ] ;
102+
103+ for ( const [ workflowName , runs ] of workflowGroups ) {
104+ // Sort by creation date (newest first)
105+ runs . sort ( ( a , b ) => new Date ( b . createdAt ) . getTime ( ) - new Date ( a . createdAt ) . getTime ( ) ) ;
106+ const mostRecent = runs [ 0 ] ;
107+
108+ log ( `DEBUG: Processing workflow: ${ workflowName } ` ) ;
109+ log ( `DEBUG: Latest run has conclusion: ${ mostRecent . conclusion } ` ) ;
110+
111+ if ( mostRecent . conclusion === 'failure' || mostRecent . conclusion === 'cancelled' ) {
112+ log ( `DEBUG: Adding ${ workflowName } to failures list` ) ;
113+ failures . push ( {
114+ runId : mostRecent . databaseId ,
115+ workflowName : mostRecent . workflowName ,
116+ headSha : mostRecent . headSha ,
117+ url : mostRecent . url
118+ } ) ;
119+ }
96120 }
97- workflowGroups . get ( run . workflowName ) ! . push ( run ) ;
98- } ) ;
99-
100- const failures : FailureInfo [ ] = [ ] ;
101-
102- for ( const [ workflowName , runs ] of workflowGroups ) {
103- // Sort by creation date (newest first)
104- runs . sort ( ( a , b ) => new Date ( b . createdAt ) . getTime ( ) - new Date ( a . createdAt ) . getTime ( ) ) ;
105- const mostRecent = runs [ 0 ] ;
106121
107- log ( `DEBUG: Processing workflow: ${ workflowName } ` ) ;
108- log ( `DEBUG: Latest run has conclusion: ${ mostRecent . conclusion } ` ) ;
122+ log ( `DEBUG: Final failures list has ${ failures . length } entries` ) ;
109123
110- if ( mostRecent . conclusion === 'failure' || mostRecent . conclusion === 'cancelled' ) {
111- log ( `DEBUG: Adding ${ workflowName } to failures list` ) ;
112- failures . push ( {
113- runId : mostRecent . databaseId ,
114- workflowName : mostRecent . workflowName ,
115- headSha : mostRecent . headSha ,
116- url : mostRecent . url
117- } ) ;
124+ if ( failures . length === 0 ) {
125+ log ( "No unresolved scheduled workflow failures found" ) ;
126+ return [ ] ;
118127 }
119- }
120-
121- log ( `DEBUG: Final failures list has ${ failures . length } entries` ) ;
122-
123- if ( failures . length === 0 ) {
124- log ( "No unresolved scheduled workflow failures found" ) ;
128+
129+ log ( `Found ${ failures . length } unresolved scheduled workflow failures` ) ;
130+ return failures ;
131+ } catch ( error : any ) {
132+ log ( `ERROR: Failed to get scheduled failures: ${ error . message } ` ) ;
125133 return [ ] ;
126134 }
127-
128- log ( `Found ${ failures . length } unresolved scheduled workflow failures` ) ;
129- return failures ;
130135}
131136
132137// Setup worktree for a specific failure
@@ -139,7 +144,15 @@ async function setupWorktreeForFailure(runId: number, workflowName: string): Pro
139144 try {
140145 // Create worktree using git worktree command directly (force override if exists)
141146 const worktreePath = join ( '..' , worktreeName ) ;
142- execCommand ( `git worktree add -f "${ worktreePath } "` , { silent : true } ) ;
147+
148+ // Remove existing worktree if it exists
149+ try {
150+ execCommand ( `git worktree remove -f "${ worktreePath } "` , { silent : true } ) ;
151+ } catch ( e ) {
152+ // Ignore errors if worktree doesn't exist
153+ }
154+
155+ execCommand ( `git worktree add "${ worktreePath } "` , { silent : true } ) ;
143156
144157 // Create reports directory in worktree
145158 mkdirSync ( join ( worktreePath , 'cron' , 'reports' ) , { recursive : true } ) ;
@@ -317,102 +330,109 @@ async function invokeClaudeAnalysis(workflowName: string, worktreePath: string):
317330 const cleanWorkflow = cleanWorkflowName ( workflowName ) ;
318331 log ( `Invoking Claude analysis for ${ workflowName } in ${ worktreePath } ` ) ;
319332
320- const claudePrompt = `# Automated CI Failure Analysis
333+ const claudePrompt = `# CI Failure Analysis Task
334+
335+ You are in a dedicated worktree for analyzing a GitHub Actions workflow failure.
336+
337+ ## Your Task
338+
339+ 1. **Analyze the failure data** in the cron/reports/ directory
340+ 2. **Identify the root cause** of the ${ workflowName } workflow failure
341+ 3. **Create a detailed assessment** report
342+ 4. **Suggest concrete fixes** if the confidence level is high
343+ 5. **Write your findings** to a file called "analysis-${ cleanWorkflow } -${ CONFIG . DATE_STAMP } .md"
344+
345+ ## Available Data
346+
347+ The cron/reports/ directory contains:
348+ - failure-logs-*.txt: Full workflow failure logs
349+ - failure-details-*.json: Structured failure information
350+ - commit-range-*.txt: Commits between last success and failure
351+ - detailed-commits-*.txt: Full commit messages and details
352+ - diff-stat-*.txt: Files changed summary
353+ - full-diff-*.txt: Complete code changes
354+ - pr-info-*.json: Related pull request information
355+ - metadata-*.json: Run metadata
321356
322- You are analyzing a CI failure in a dedicated worktree. The failure data has been pre-collected in ./cron/reports/.
357+ ## Analysis Framework
323358
324- Your task is to:
325- 1. **Analyze the failure** using the pre-collected data:
326- - Read failure-logs-${ cleanWorkflow } -${ CONFIG . DATE_STAMP } .txt for error details
327- - Read failure-details-${ cleanWorkflow } -${ CONFIG . DATE_STAMP } .json for run metadata
328- - Read commit-range-${ cleanWorkflow } -${ CONFIG . DATE_STAMP } .txt for basic commit list
329- - Read detailed-commits-${ cleanWorkflow } -${ CONFIG . DATE_STAMP } .txt for full commit messages
330- - Read diff-stat-${ cleanWorkflow } -${ CONFIG . DATE_STAMP } .txt for file change summary
331- - Read diff-name-status-${ cleanWorkflow } -${ CONFIG . DATE_STAMP } .txt for specific file changes
332- - Read full-diff-${ cleanWorkflow } -${ CONFIG . DATE_STAMP } .txt for complete code changes
333- - Read pr-info-${ cleanWorkflow } -${ CONFIG . DATE_STAMP } .json for PR context
334- - Read last-good-run-${ cleanWorkflow } -${ CONFIG . DATE_STAMP } .txt for baseline reference
359+ Please structure your analysis as follows:
335360
336- 2. **Perform root cause analysis** and assess fix confidence (0-100%):
337- - **High confidence (80-100%)**: Simple dependency updates, lint fixes, obvious typos
338- - **Medium confidence (60-79%)**: Test failures with clear fixes, build config issues
339- - **Low confidence (0-59%)**: Complex logic errors, environmental issues
361+ ### 1. Executive Summary
362+ - Brief description of the failure
363+ - Impact assessment
364+ - Confidence level in diagnosis (High/Medium/Low)
340365
341- 3. **Create analysis report** in ./cron/reports/:
342- - **Always create**: assessment- ${ cleanWorkflow } - ${ CONFIG . DATE_STAMP } .md
343- - **If attempting fix**: resolution- ${ cleanWorkflow } - ${ CONFIG . DATE_STAMP } .md
344- - **If deferring**: status- ${ cleanWorkflow } - ${ CONFIG . DATE_STAMP } .md
366+ ### 2. Root Cause Analysis
367+ - Primary cause of failure
368+ - Contributing factors
369+ - Timeline of events
345370
346- 4. **If confidence >75%**: Implement fix, test, and commit with clear message
371+ ### 3. Code Analysis
372+ - Specific changes that triggered the failure
373+ - Code quality issues identified
374+ - Test coverage gaps
347375
348- **Important**: Work only with the pre-collected data. Focus on analysis and solution, not data gathering.
376+ ### 4. Recommendations
377+ - Immediate fixes needed
378+ - Long-term improvements
379+ - Prevention strategies
349380
350- Begin analysis now.` ;
381+ ### 5. Implementation Plan
382+ - Step-by-step fix instructions
383+ - Testing recommendations
384+ - Risk assessment
385+
386+ Focus on actionable insights that will help prevent similar failures.` ;
351387
352388 try {
389+ // Test write permissions in worktree
390+ try {
391+ writeFileSync ( join ( worktreePath , 'write-test.tmp' ) , 'test' ) ;
392+ execCommand ( `rm -f "${ join ( worktreePath , 'write-test.tmp' ) } "` , { silent : true } ) ;
393+ log ( `DEBUG: Write permissions confirmed in ${ worktreePath } ` ) ;
394+ } catch ( permError ) {
395+ log ( `WARNING: Write permission issues in ${ worktreePath } : ${ permError } ` ) ;
396+ }
397+
353398 // Write prompt to file for debugging
354399 writeFileSync ( join ( worktreePath , 'claude-prompt.txt' ) , claudePrompt ) ;
355400 log ( `DEBUG: Wrote Claude prompt to ${ join ( worktreePath , 'claude-prompt.txt' ) } ` ) ;
356401
357- // Use Claude in print mode (-p) which is better for programmatic usage
358- const claude = spawn ( 'claude' , [ '-p' , claudePrompt ] , {
359- cwd : worktreePath ,
360- stdio : 'pipe' ,
361- env : process . env // Pass full environment including auth configs
362- } ) ;
363-
364- let claudeOutput = '' ;
365- let claudeError = '' ;
402+ // Invoke Claude for actual CI analysis
403+ log ( `Starting CI failure analysis for ${ workflowName } ...` ) ;
366404
367- claude . stdout . on ( 'data' , ( data ) => {
368- const chunk = data . toString ( ) ;
369- claudeOutput += chunk ;
370- log ( `DEBUG: Claude stdout chunk: ${ chunk . slice ( 0 , 200 ) } ...` ) ;
371- } ) ;
372-
373- claude . stderr . on ( 'data' , ( data ) => {
374- const chunk = data . toString ( ) ;
375- claudeError += chunk ;
376- log ( `DEBUG: Claude stderr chunk: ${ chunk . slice ( 0 , 200 ) } ...` ) ;
377- } ) ;
378-
379- // Wait for Claude to complete with timeout
380- const result = await new Promise < number > ( ( resolve , reject ) => {
381- const timeout = setTimeout ( ( ) => {
382- claude . kill ( ) ;
383- reject ( new Error ( 'Claude analysis timed out' ) ) ;
384- } , CONFIG . CLAUDE_TIMEOUT ) ;
405+ try {
406+ // Use Claude CLI to analyze the failure with all collected data
407+ const analysisResult = execSync ( `claude -p "${ claudePrompt . replace ( / " / g, '\\"' ) } "` , {
408+ encoding : 'utf8' ,
409+ cwd : worktreePath ,
410+ env : { ...process . env } ,
411+ stdio : 'pipe' ,
412+ timeout : CONFIG . CLAUDE_TIMEOUT
413+ } ) . trim ( ) ;
385414
386- claude . on ( 'close' , ( code ) => {
387- clearTimeout ( timeout ) ;
388- resolve ( code || 0 ) ;
389- } ) ;
415+ log ( `Claude analysis completed for ${ workflowName } ` ) ;
416+ log ( `Analysis result length: ${ analysisResult . length } characters` ) ;
417+
418+ // Write the analysis result to a file
419+ const analysisFile = join ( worktreePath , 'cron' , 'reports' , `claude-analysis-${ cleanWorkflow } -${ CONFIG . DATE_STAMP } .md` ) ;
420+ writeFileSync ( analysisFile , analysisResult ) ;
421+ log ( `Analysis written to: ${ analysisFile } ` ) ;
390422
391- claude . on ( 'error' , ( error ) => {
392- clearTimeout ( timeout ) ;
393- reject ( error ) ;
394- } ) ;
395- } ) ;
396-
397- if ( result === 0 ) {
398- log ( `Claude analysis completed successfully for ${ workflowName } ` ) ;
399423 return true ;
400- } else {
401- log ( `ERROR: Claude analysis failed for ${ workflowName } with exit code ${ result } ` ) ;
402- if ( claudeError ) {
403- log ( `Claude stderr: ${ claudeError } ` ) ;
424+ } catch ( claudeError : any ) {
425+ log ( `ERROR: Claude analysis failed: ${ claudeError . message } ` ) ;
426+ if ( claudeError . stderr ) {
427+ log ( `ERROR: Claude stderr: ${ claudeError . stderr } ` ) ;
404428 }
405- if ( claudeOutput ) {
406- log ( `Claude stdout: ${ claudeOutput } ` ) ;
429+ if ( claudeError . stdout ) {
430+ log ( `ERROR: Claude stdout: ${ claudeError . stdout } ` ) ;
407431 }
408432 return false ;
409433 }
410434 } catch ( error : any ) {
411- if ( error . message . includes ( 'timed out' ) ) {
412- log ( `WARNING: Claude analysis timed out for ${ workflowName } after ${ CONFIG . CLAUDE_TIMEOUT / 1000 } s` ) ;
413- } else {
414- log ( `ERROR: Claude analysis failed for ${ workflowName } : ${ error . message } ` ) ;
415- }
435+ log ( `ERROR: Claude analysis failed for ${ workflowName } : ${ error . message } ` ) ;
416436 return false ;
417437 }
418438}
@@ -495,6 +515,15 @@ async function main(): Promise<void> {
495515
496516 log ( `Nightly CI check completed. Processed ${ failureCount } failures, ${ successCount } successful analyses.` ) ;
497517 log ( `See full log at: ${ CONFIG . LOG_FILE } ` ) ;
518+
519+ if ( successCount > 0 ) {
520+ log ( `Analysis reports generated in worktree directories:` ) ;
521+ for ( const failure of failures ) {
522+ const cleanWorkflow = cleanWorkflowName ( failure . workflowName ) ;
523+ const worktreePath = join ( '..' , `cc-resolve-${ CONFIG . DATE_STAMP } -${ cleanWorkflow } ` ) ;
524+ log ( ` - ${ worktreePath } /cron/reports/` ) ;
525+ }
526+ }
498527 } catch ( error : any ) {
499528 log ( `ERROR: Main execution failed: ${ error . message } ` ) ;
500529 process . exit ( 1 ) ;
0 commit comments