(run)
| 87 | // ─── metric extraction ─────────────────────────────────────────────────────── |
| 88 | |
| 89 | function summarize(run) { |
| 90 | const results = run.data.results; |
| 91 | const total = results.length; |
| 92 | const passed = results.filter((r) => r.passed).length; |
| 93 | const reward = total > 0 ? passed / total : 0; |
| 94 | const totalMs = results.reduce((s, r) => s + (r.elapsedMs || 0), 0); |
| 95 | const totalToolCalls = results.reduce((s, r) => s + (r.toolCalls || 0), 0); |
| 96 | // Build a per-task pass map keyed by `id` for diffing. |
| 97 | const byId = {}; |
| 98 | for (const r of results) { |
| 99 | byId[r.id] = { |
| 100 | passed: !!r.passed, |
| 101 | elapsedMs: r.elapsedMs || 0, |
| 102 | toolCalls: r.toolCalls || 0, |
| 103 | verifyError: r.verifyError || null, |
| 104 | }; |
| 105 | } |
| 106 | return { passed, total, reward, totalMs, totalToolCalls, byId }; |
| 107 | } |
| 108 | |
| 109 | // ─── verdict ───────────────────────────────────────────────────────────────── |
| 110 |
no outgoing calls
no test coverage detected