Skip to content

Commit 58335d1

Browse files
committed
udpate - park
1 parent cf6d53e commit 58335d1

File tree

1 file changed

+147
-118
lines changed

1 file changed

+147
-118
lines changed

cron/nightly-ci-check.ts

Lines changed: 147 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -77,56 +77,61 @@ function cleanWorkflowName(name: string): string {
7777
async function getScheduledFailures(): Promise<FailureInfo[]> {
7878
log("Checking for scheduled workflow failures...");
7979

80-
// Get recent scheduled runs
81-
const runsJson = execCommand(
82-
'gh run list --limit 50 --json status,conclusion,workflowName,createdAt,headSha,url,event,databaseId',
83-
{ silent: true }
84-
);
85-
86-
const allRuns: WorkflowRun[] = JSON.parse(runsJson);
87-
const scheduledRuns = allRuns.filter(run => run.event === 'schedule');
88-
89-
log(`DEBUG: Found ${scheduledRuns.length} scheduled runs`);
80+
try {
81+
// Get recent scheduled runs
82+
const runsJson = execCommand(
83+
'gh run list --limit 50 --json status,conclusion,workflowName,createdAt,headSha,url,event,databaseId',
84+
{ silent: true }
85+
);
86+
87+
const allRuns: WorkflowRun[] = JSON.parse(runsJson);
88+
const scheduledRuns = allRuns.filter(run => run.event === 'schedule');
9089

91-
// Group by workflow and find most recent run per workflow
92-
const workflowGroups = new Map<string, WorkflowRun[]>();
93-
scheduledRuns.forEach(run => {
94-
if (!workflowGroups.has(run.workflowName)) {
95-
workflowGroups.set(run.workflowName, []);
90+
log(`DEBUG: Found ${scheduledRuns.length} scheduled runs`);
91+
92+
// Group by workflow and find most recent run per workflow
93+
const workflowGroups = new Map<string, WorkflowRun[]>();
94+
scheduledRuns.forEach(run => {
95+
if (!workflowGroups.has(run.workflowName)) {
96+
workflowGroups.set(run.workflowName, []);
97+
}
98+
workflowGroups.get(run.workflowName)!.push(run);
99+
});
100+
101+
const failures: FailureInfo[] = [];
102+
103+
for (const [workflowName, runs] of workflowGroups) {
104+
// Sort by creation date (newest first)
105+
runs.sort((a, b) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime());
106+
const mostRecent = runs[0];
107+
108+
log(`DEBUG: Processing workflow: ${workflowName}`);
109+
log(`DEBUG: Latest run has conclusion: ${mostRecent.conclusion}`);
110+
111+
if (mostRecent.conclusion === 'failure' || mostRecent.conclusion === 'cancelled') {
112+
log(`DEBUG: Adding ${workflowName} to failures list`);
113+
failures.push({
114+
runId: mostRecent.databaseId,
115+
workflowName: mostRecent.workflowName,
116+
headSha: mostRecent.headSha,
117+
url: mostRecent.url
118+
});
119+
}
96120
}
97-
workflowGroups.get(run.workflowName)!.push(run);
98-
});
99-
100-
const failures: FailureInfo[] = [];
101-
102-
for (const [workflowName, runs] of workflowGroups) {
103-
// Sort by creation date (newest first)
104-
runs.sort((a, b) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime());
105-
const mostRecent = runs[0];
106121

107-
log(`DEBUG: Processing workflow: ${workflowName}`);
108-
log(`DEBUG: Latest run has conclusion: ${mostRecent.conclusion}`);
122+
log(`DEBUG: Final failures list has ${failures.length} entries`);
109123

110-
if (mostRecent.conclusion === 'failure' || mostRecent.conclusion === 'cancelled') {
111-
log(`DEBUG: Adding ${workflowName} to failures list`);
112-
failures.push({
113-
runId: mostRecent.databaseId,
114-
workflowName: mostRecent.workflowName,
115-
headSha: mostRecent.headSha,
116-
url: mostRecent.url
117-
});
124+
if (failures.length === 0) {
125+
log("No unresolved scheduled workflow failures found");
126+
return [];
118127
}
119-
}
120-
121-
log(`DEBUG: Final failures list has ${failures.length} entries`);
122-
123-
if (failures.length === 0) {
124-
log("No unresolved scheduled workflow failures found");
128+
129+
log(`Found ${failures.length} unresolved scheduled workflow failures`);
130+
return failures;
131+
} catch (error: any) {
132+
log(`ERROR: Failed to get scheduled failures: ${error.message}`);
125133
return [];
126134
}
127-
128-
log(`Found ${failures.length} unresolved scheduled workflow failures`);
129-
return failures;
130135
}
131136

132137
// Setup worktree for a specific failure
@@ -139,7 +144,15 @@ async function setupWorktreeForFailure(runId: number, workflowName: string): Pro
139144
try {
140145
// Create worktree using git worktree command directly (force override if exists)
141146
const worktreePath = join('..', worktreeName);
142-
execCommand(`git worktree add -f "${worktreePath}"`, { silent: true });
147+
148+
// Remove existing worktree if it exists
149+
try {
150+
execCommand(`git worktree remove -f "${worktreePath}"`, { silent: true });
151+
} catch (e) {
152+
// Ignore errors if worktree doesn't exist
153+
}
154+
155+
execCommand(`git worktree add "${worktreePath}"`, { silent: true });
143156

144157
// Create reports directory in worktree
145158
mkdirSync(join(worktreePath, 'cron', 'reports'), { recursive: true });
@@ -317,102 +330,109 @@ async function invokeClaudeAnalysis(workflowName: string, worktreePath: string):
317330
const cleanWorkflow = cleanWorkflowName(workflowName);
318331
log(`Invoking Claude analysis for ${workflowName} in ${worktreePath}`);
319332

320-
const claudePrompt = `# Automated CI Failure Analysis
333+
const claudePrompt = `# CI Failure Analysis Task
334+
335+
You are in a dedicated worktree for analyzing a GitHub Actions workflow failure.
336+
337+
## Your Task
338+
339+
1. **Analyze the failure data** in the cron/reports/ directory
340+
2. **Identify the root cause** of the ${workflowName} workflow failure
341+
3. **Create a detailed assessment** report
342+
4. **Suggest concrete fixes** if the confidence level is high
343+
5. **Write your findings** to a file called "analysis-${cleanWorkflow}-${CONFIG.DATE_STAMP}.md"
344+
345+
## Available Data
346+
347+
The cron/reports/ directory contains:
348+
- failure-logs-*.txt: Full workflow failure logs
349+
- failure-details-*.json: Structured failure information
350+
- commit-range-*.txt: Commits between last success and failure
351+
- detailed-commits-*.txt: Full commit messages and details
352+
- diff-stat-*.txt: Files changed summary
353+
- full-diff-*.txt: Complete code changes
354+
- pr-info-*.json: Related pull request information
355+
- metadata-*.json: Run metadata
321356
322-
You are analyzing a CI failure in a dedicated worktree. The failure data has been pre-collected in ./cron/reports/.
357+
## Analysis Framework
323358
324-
Your task is to:
325-
1. **Analyze the failure** using the pre-collected data:
326-
- Read failure-logs-${cleanWorkflow}-${CONFIG.DATE_STAMP}.txt for error details
327-
- Read failure-details-${cleanWorkflow}-${CONFIG.DATE_STAMP}.json for run metadata
328-
- Read commit-range-${cleanWorkflow}-${CONFIG.DATE_STAMP}.txt for basic commit list
329-
- Read detailed-commits-${cleanWorkflow}-${CONFIG.DATE_STAMP}.txt for full commit messages
330-
- Read diff-stat-${cleanWorkflow}-${CONFIG.DATE_STAMP}.txt for file change summary
331-
- Read diff-name-status-${cleanWorkflow}-${CONFIG.DATE_STAMP}.txt for specific file changes
332-
- Read full-diff-${cleanWorkflow}-${CONFIG.DATE_STAMP}.txt for complete code changes
333-
- Read pr-info-${cleanWorkflow}-${CONFIG.DATE_STAMP}.json for PR context
334-
- Read last-good-run-${cleanWorkflow}-${CONFIG.DATE_STAMP}.txt for baseline reference
359+
Please structure your analysis as follows:
335360
336-
2. **Perform root cause analysis** and assess fix confidence (0-100%):
337-
- **High confidence (80-100%)**: Simple dependency updates, lint fixes, obvious typos
338-
- **Medium confidence (60-79%)**: Test failures with clear fixes, build config issues
339-
- **Low confidence (0-59%)**: Complex logic errors, environmental issues
361+
### 1. Executive Summary
362+
- Brief description of the failure
363+
- Impact assessment
364+
- Confidence level in diagnosis (High/Medium/Low)
340365
341-
3. **Create analysis report** in ./cron/reports/:
342-
- **Always create**: assessment-${cleanWorkflow}-${CONFIG.DATE_STAMP}.md
343-
- **If attempting fix**: resolution-${cleanWorkflow}-${CONFIG.DATE_STAMP}.md
344-
- **If deferring**: status-${cleanWorkflow}-${CONFIG.DATE_STAMP}.md
366+
### 2. Root Cause Analysis
367+
- Primary cause of failure
368+
- Contributing factors
369+
- Timeline of events
345370
346-
4. **If confidence >75%**: Implement fix, test, and commit with clear message
371+
### 3. Code Analysis
372+
- Specific changes that triggered the failure
373+
- Code quality issues identified
374+
- Test coverage gaps
347375
348-
**Important**: Work only with the pre-collected data. Focus on analysis and solution, not data gathering.
376+
### 4. Recommendations
377+
- Immediate fixes needed
378+
- Long-term improvements
379+
- Prevention strategies
349380
350-
Begin analysis now.`;
381+
### 5. Implementation Plan
382+
- Step-by-step fix instructions
383+
- Testing recommendations
384+
- Risk assessment
385+
386+
Focus on actionable insights that will help prevent similar failures.`;
351387

352388
try {
389+
// Test write permissions in worktree
390+
try {
391+
writeFileSync(join(worktreePath, 'write-test.tmp'), 'test');
392+
execCommand(`rm -f "${join(worktreePath, 'write-test.tmp')}"`, { silent: true });
393+
log(`DEBUG: Write permissions confirmed in ${worktreePath}`);
394+
} catch (permError) {
395+
log(`WARNING: Write permission issues in ${worktreePath}: ${permError}`);
396+
}
397+
353398
// Write prompt to file for debugging
354399
writeFileSync(join(worktreePath, 'claude-prompt.txt'), claudePrompt);
355400
log(`DEBUG: Wrote Claude prompt to ${join(worktreePath, 'claude-prompt.txt')}`);
356401

357-
// Use Claude in print mode (-p) which is better for programmatic usage
358-
const claude = spawn('claude', ['-p', claudePrompt], {
359-
cwd: worktreePath,
360-
stdio: 'pipe',
361-
env: process.env // Pass full environment including auth configs
362-
});
363-
364-
let claudeOutput = '';
365-
let claudeError = '';
402+
// Invoke Claude for actual CI analysis
403+
log(`Starting CI failure analysis for ${workflowName}...`);
366404

367-
claude.stdout.on('data', (data) => {
368-
const chunk = data.toString();
369-
claudeOutput += chunk;
370-
log(`DEBUG: Claude stdout chunk: ${chunk.slice(0, 200)}...`);
371-
});
372-
373-
claude.stderr.on('data', (data) => {
374-
const chunk = data.toString();
375-
claudeError += chunk;
376-
log(`DEBUG: Claude stderr chunk: ${chunk.slice(0, 200)}...`);
377-
});
378-
379-
// Wait for Claude to complete with timeout
380-
const result = await new Promise<number>((resolve, reject) => {
381-
const timeout = setTimeout(() => {
382-
claude.kill();
383-
reject(new Error('Claude analysis timed out'));
384-
}, CONFIG.CLAUDE_TIMEOUT);
405+
try {
406+
// Use Claude CLI to analyze the failure with all collected data
407+
const analysisResult = execSync(`claude -p "${claudePrompt.replace(/"/g, '\\"')}"`, {
408+
encoding: 'utf8',
409+
cwd: worktreePath,
410+
env: { ...process.env },
411+
stdio: 'pipe',
412+
timeout: CONFIG.CLAUDE_TIMEOUT
413+
}).trim();
385414

386-
claude.on('close', (code) => {
387-
clearTimeout(timeout);
388-
resolve(code || 0);
389-
});
415+
log(`Claude analysis completed for ${workflowName}`);
416+
log(`Analysis result length: ${analysisResult.length} characters`);
417+
418+
// Write the analysis result to a file
419+
const analysisFile = join(worktreePath, 'cron', 'reports', `claude-analysis-${cleanWorkflow}-${CONFIG.DATE_STAMP}.md`);
420+
writeFileSync(analysisFile, analysisResult);
421+
log(`Analysis written to: ${analysisFile}`);
390422

391-
claude.on('error', (error) => {
392-
clearTimeout(timeout);
393-
reject(error);
394-
});
395-
});
396-
397-
if (result === 0) {
398-
log(`Claude analysis completed successfully for ${workflowName}`);
399423
return true;
400-
} else {
401-
log(`ERROR: Claude analysis failed for ${workflowName} with exit code ${result}`);
402-
if (claudeError) {
403-
log(`Claude stderr: ${claudeError}`);
424+
} catch (claudeError: any) {
425+
log(`ERROR: Claude analysis failed: ${claudeError.message}`);
426+
if (claudeError.stderr) {
427+
log(`ERROR: Claude stderr: ${claudeError.stderr}`);
404428
}
405-
if (claudeOutput) {
406-
log(`Claude stdout: ${claudeOutput}`);
429+
if (claudeError.stdout) {
430+
log(`ERROR: Claude stdout: ${claudeError.stdout}`);
407431
}
408432
return false;
409433
}
410434
} catch (error: any) {
411-
if (error.message.includes('timed out')) {
412-
log(`WARNING: Claude analysis timed out for ${workflowName} after ${CONFIG.CLAUDE_TIMEOUT / 1000}s`);
413-
} else {
414-
log(`ERROR: Claude analysis failed for ${workflowName}: ${error.message}`);
415-
}
435+
log(`ERROR: Claude analysis failed for ${workflowName}: ${error.message}`);
416436
return false;
417437
}
418438
}
@@ -495,6 +515,15 @@ async function main(): Promise<void> {
495515

496516
log(`Nightly CI check completed. Processed ${failureCount} failures, ${successCount} successful analyses.`);
497517
log(`See full log at: ${CONFIG.LOG_FILE}`);
518+
519+
if (successCount > 0) {
520+
log(`Analysis reports generated in worktree directories:`);
521+
for (const failure of failures) {
522+
const cleanWorkflow = cleanWorkflowName(failure.workflowName);
523+
const worktreePath = join('..', `cc-resolve-${CONFIG.DATE_STAMP}-${cleanWorkflow}`);
524+
log(` - ${worktreePath}/cron/reports/`);
525+
}
526+
}
498527
} catch (error: any) {
499528
log(`ERROR: Main execution failed: ${error.message}`);
500529
process.exit(1);

0 commit comments

Comments
 (0)