(options: {
evalDataPaths: string[]
agents: string[]
taskConcurrency?: number
client?: CodebuffClient
taskIds?: string[]
extractLessons?: boolean
disableAnalysis?: boolean
saveTraces?: boolean
})
| 317 | } |
| 318 | |
| 319 | export async function runBuffBench(options: { |
| 320 | evalDataPaths: string[] |
| 321 | agents: string[] |
| 322 | taskConcurrency?: number |
| 323 | client?: CodebuffClient |
| 324 | taskIds?: string[] |
| 325 | extractLessons?: boolean |
| 326 | disableAnalysis?: boolean |
| 327 | saveTraces?: boolean |
| 328 | }) { |
| 329 | const { |
| 330 | evalDataPaths, |
| 331 | agents, |
| 332 | taskConcurrency = 1, |
| 333 | taskIds, |
| 334 | extractLessons = false, |
| 335 | disableAnalysis = false, |
| 336 | saveTraces = false, |
| 337 | } = options |
| 338 | |
| 339 | if (evalDataPaths.length === 0) { |
| 340 | throw new Error('At least one eval data path is required') |
| 341 | } |
| 342 | |
| 343 | // Load all eval files and create a mapping of commits to their source eval data |
| 344 | const allCommitsWithSource: CommitWithSource[] = [] |
| 345 | const loadedEvalFiles: { path: string; data: EvalDataV2 }[] = [] |
| 346 | |
| 347 | for (const evalDataPath of evalDataPaths) { |
| 348 | const evalData: EvalDataV2 = JSON.parse( |
| 349 | fs.readFileSync(evalDataPath, 'utf-8'), |
| 350 | ) |
| 351 | loadedEvalFiles.push({ path: evalDataPath, data: evalData }) |
| 352 | |
| 353 | for (const commit of evalData.evalCommits) { |
| 354 | allCommitsWithSource.push({ |
| 355 | commit, |
| 356 | evalData, |
| 357 | evalDataPath, |
| 358 | }) |
| 359 | } |
| 360 | } |
| 361 | |
| 362 | console.log( |
| 363 | `Loaded ${loadedEvalFiles.length} eval file(s) with ${allCommitsWithSource.length} total tasks`, |
| 364 | ) |
| 365 | for (const { path: p, data } of loadedEvalFiles) { |
| 366 | console.log(` - ${path.basename(p)}: ${data.evalCommits.length} tasks`) |
| 367 | } |
| 368 | |
| 369 | // Collect all unique binInstalls from all eval files |
| 370 | const allBinInstalls = loadedEvalFiles.flatMap( |
| 371 | (f) => f.data.binInstalls ?? [], |
| 372 | ) |
| 373 | const uniqueBinInstalls = allBinInstalls.filter( |
| 374 | (bin, index, self) => index === self.findIndex((b) => b.name === bin.name), |
| 375 | ) |
| 376 |
no test coverage detected