Skip to content

Commit e2082c0

Browse files
committed
Debug error option in agent runner
1 parent 49767f5 commit e2082c0

File tree

3 files changed

+26
-6
lines changed

3 files changed

+26
-6
lines changed

evals/buffbench/agent-runner.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import fs from 'fs'
2+
import path from 'path'
13
import { execSync } from 'child_process'
24

35
import { withTimeout } from '@codebuff/common/util/promise'
@@ -9,6 +11,8 @@ import type { EvalCommitV2 } from './types'
911

1012
export type AgentStep = PrintModeEvent
1113

14+
const DEBUG_ERROR = false
15+
1216
export async function runAgentOnCommit({
1317
client,
1418
agentId,
@@ -66,6 +70,22 @@ export async function runAgentOnCommit({
6670
`[${commit.id}:${agentId}] Error event:`,
6771
event.message,
6872
)
73+
if (DEBUG_ERROR) {
74+
fs.writeFileSync(
75+
path.join(
76+
__dirname,
77+
`${commit.id}-${agentId}-error-${Math.random().toString(36).substring(2, 6)}.json`,
78+
),
79+
JSON.stringify(
80+
{
81+
error: event.message,
82+
trace: trace,
83+
},
84+
null,
85+
2,
86+
),
87+
)
88+
}
6989
}
7090
trace.push(event)
7191
},

evals/buffbench/judge.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,8 @@ ${error ? `\n## Error Encountered\n${error}` : ''}`
182182
}
183183
},
184184
}),
185-
10 * 60 * 1000,
186-
'Judge agent timed out after 10 minutes',
185+
20 * 60 * 1000,
186+
'Judge agent timed out after 20 minutes',
187187
)
188188

189189
if (judgeResult.output.type !== 'structuredOutput') {

evals/buffbench/trace-analyzer.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ function truncateTrace(trace: AgentStep[]): AgentStep[] {
2323
// Handle tool_result events
2424
if (step.type === 'tool_result') {
2525
const output = Array.isArray(step.output) ? step.output : [step.output]
26-
26+
2727
// Truncate read_files results
2828
if (step.toolName === 'read_files') {
2929
const truncatedOutput = output.map((item: any) => {
@@ -96,7 +96,7 @@ function truncateTrace(trace: AgentStep[]): AgentStep[] {
9696
}
9797
}
9898
}
99-
99+
100100
return step
101101
})
102102
}
@@ -278,8 +278,8 @@ Focus on the HOW, not the WHAT: We want to understand and improve how agents wor
278278
}
279279
},
280280
}),
281-
10 * 60 * 1000,
282-
'Trace analyzer agent timed out after 10 minutes',
281+
20 * 60 * 1000,
282+
'Trace analyzer agent timed out after 20 minutes',
283283
)
284284

285285
const { output } = analyzerResult

0 commit comments

Comments
 (0)