Skip to content

Commit 8ed66f1

Browse files
committed
evals:Allow running a subset of tasks
1 parent 39b6c00 commit 8ed66f1

File tree

2 files changed

+49
-2
lines changed

2 files changed

+49
-2
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import path from 'path'
2+
3+
import { runBuffBench } from './run-buffbench'
4+
5+
async function main() {
6+
await runBuffBench({
7+
evalDataPath: path.join(__dirname, 'eval-codebuff.json'),
8+
agents: ['base2-validator'],
9+
taskIds: ['add-deep-thinkers'],
10+
})
11+
12+
process.exit(0)
13+
}
14+
15+
if (import.meta.main) {
16+
main().catch((error) => {
17+
console.error('Error running buffbench:', error)
18+
process.exit(1)
19+
})
20+
}

evals/buffbench/run-buffbench.ts

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,13 +169,40 @@ export async function runBuffBench(options: {
169169
agents: string[]
170170
taskConcurrency?: number
171171
client?: CodebuffClient
172+
taskIds?: string[]
172173
}) {
173-
const { evalDataPath, agents, taskConcurrency = 1 } = options
174+
const { evalDataPath, agents, taskConcurrency = 1, taskIds } = options
174175

175176
const evalData: EvalDataV2 = JSON.parse(
176177
fs.readFileSync(evalDataPath, 'utf-8'),
177178
)
178-
const commitsToRun = evalData.evalCommits
179+
180+
let commitsToRun: EvalDataV2['evalCommits']
181+
if (taskIds && taskIds.length > 0) {
182+
const foundCommits: EvalDataV2['evalCommits'] = []
183+
const notFoundIds: string[] = []
184+
185+
for (const taskId of taskIds) {
186+
const foundCommit = evalData.evalCommits.find((c) => c.id === taskId)
187+
if (foundCommit) {
188+
foundCommits.push(foundCommit)
189+
} else {
190+
notFoundIds.push(taskId)
191+
}
192+
}
193+
194+
if (notFoundIds.length > 0) {
195+
const availableIds = evalData.evalCommits.map((c) => c.id).join(', ')
196+
throw new Error(
197+
`Task ID(s) not found: ${notFoundIds.join(', ')}. Available task IDs: ${availableIds}`,
198+
)
199+
}
200+
201+
commitsToRun = foundCommits
202+
console.log(`Running ${foundCommits.length} task(s): ${taskIds.join(', ')}`)
203+
} else {
204+
commitsToRun = evalData.evalCommits
205+
}
179206

180207
const client =
181208
options.client ??

0 commit comments

Comments
 (0)