We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d067a1b commit 161ecddCopy full SHA for 161ecdd
evals/buffbench/run-buffbench.ts
@@ -362,5 +362,16 @@ export async function runBuffBench(options: {
362
)
363
}
364
365
+ // Print all overall scores for distribution analysis
366
+ console.log('\n=== Score Distribution ===')
367
+ for (const [agentId, data] of Object.entries(results)) {
368
+ const validRuns = data.runs.filter(
369
+ (r) => !commitShasWithErrors.has(r.commitSha),
370
+ )
371
+ const scores = validRuns.map((r) => r.judging.overallScore.toFixed(1))
372
+ console.log(`\n${agentId}:`)
373
+ console.log(` Scores: ${scores.join(', ')}`)
374
+ }
375
+
376
return finalResults
377
0 commit comments