| 211 | // ─── main ──────────────────────────────────────────────────────────────────── |
| 212 | |
| 213 | function main() { |
| 214 | const args = parseArgs(process.argv); |
| 215 | if (args.help || args._.length !== 2) { |
| 216 | usage(); |
| 217 | process.exit(args.help ? 0 : 3); |
| 218 | } |
| 219 | const [baseArg, featArg] = args._; |
| 220 | let baseRun, featRun; |
| 221 | try { |
| 222 | baseRun = loadRun(baseArg); |
| 223 | featRun = loadRun(featArg); |
| 224 | } catch (e) { |
| 225 | console.error(`error: ${e.message}`); |
| 226 | process.exit(3); |
| 227 | } |
| 228 | |
| 229 | const baseSum = summarize(baseRun); |
| 230 | const featSum = summarize(featRun); |
| 231 | const moves = classifyTaskMoves(baseSum, featSum); |
| 232 | const deltaReward = featSum.reward - baseSum.reward; |
| 233 | const v = verdict(deltaReward, moves, args.threshold); |
| 234 | const code = exitCodeFor(v); |
| 235 | |
| 236 | if (args.json) { |
| 237 | process.stdout.write(JSON.stringify({ |
| 238 | verdict: v, |
| 239 | exitCode: code, |
| 240 | threshold: args.threshold, |
| 241 | delta: { reward: deltaReward, totalMs: featSum.totalMs - baseSum.totalMs, totalToolCalls: featSum.totalToolCalls - baseSum.totalToolCalls }, |
| 242 | baseline: { path: baseRun.path, passed: baseSum.passed, total: baseSum.total, reward: baseSum.reward, totalMs: baseSum.totalMs }, |
| 243 | feature: { path: featRun.path, passed: featSum.passed, total: featSum.total, reward: featSum.reward, totalMs: featSum.totalMs }, |
| 244 | regressed: moves.hard, |
| 245 | recovered: moves.recovered, |
| 246 | }, null, 2) + '\n'); |
| 247 | } else { |
| 248 | render(baseSum, featSum, moves, v, args.threshold, baseRun.path, featRun.path); |
| 249 | } |
| 250 | process.exit(code); |
| 251 | } |
| 252 | |
| 253 | if (require.main === module) main(); |
| 254 | |