Skip to content

Commit 161ecdd

Browse files
committed
evals: print all scores
1 parent d067a1b commit 161ecdd

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

evals/buffbench/run-buffbench.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,5 +362,16 @@ export async function runBuffBench(options: {
362362
)
363363
}
364364

365+
// Print all overall scores for distribution analysis
366+
console.log('\n=== Score Distribution ===')
367+
for (const [agentId, data] of Object.entries(results)) {
368+
const validRuns = data.runs.filter(
369+
(r) => !commitShasWithErrors.has(r.commitSha),
370+
)
371+
const scores = validRuns.map((r) => r.judging.overallScore.toFixed(1))
372+
console.log(`\n${agentId}:`)
373+
console.log(` Scores: ${scores.join(', ')}`)
374+
}
375+
365376
return finalResults
366377
}

0 commit comments

Comments
 (0)