(rows: Array<EvalRow>, allLogs: Array<RunLogPayload>)
| 450 | } |
| 451 | |
| 452 | function finalizeRows(rows: Array<EvalRow>, allLogs: Array<RunLogPayload>) { |
| 453 | applyStability(rows, allLogs) |
| 454 | |
| 455 | bucketedTiers( |
| 456 | rows, |
| 457 | (row) => row.durationMs, |
| 458 | (row, tier) => { |
| 459 | row.speedTier = tier |
| 460 | }, |
| 461 | true, |
| 462 | ) |
| 463 | bucketedTiers( |
| 464 | rows, |
| 465 | (row) => row.tokenEfficiency, |
| 466 | (row, tier) => { |
| 467 | row.tokenEfficiencyTier = tier |
| 468 | }, |
| 469 | true, |
| 470 | ) |
| 471 | |
| 472 | for (const row of rows) { |
| 473 | if (!row.judge) continue |
| 474 | const rating = computeStarRating({ |
| 475 | accuracy: row.judge.accuracy, |
| 476 | comprehensiveness: row.judge.comprehensiveness, |
| 477 | typescriptQuality: row.judge.typescriptQuality, |
| 478 | codeModeEfficiency: row.judge.codeModeEfficiency, |
| 479 | speedTier: row.speedTier, |
| 480 | tokenEfficiencyTier: row.tokenEfficiencyTier, |
| 481 | stabilityTier: row.stabilityTier, |
| 482 | compilationFailures: row.compilationFailures, |
| 483 | runtimeFailures: row.runtimeFailures, |
| 484 | totalExecuteCalls: row.totalExecuteCalls, |
| 485 | }) |
| 486 | row.stars = rating.stars |
| 487 | row.weightedScore = rating.weightedScore |
| 488 | } |
| 489 | } |
| 490 | |
| 491 | function listRunLogFiles(): Array<string> { |
| 492 | if (!existsSync(LOG_DIR)) return [] |
no test coverage detected