Function summarize

bench/diff.js:89–107 · view source on GitHub ↗

(run)

Source from the content-addressed store, hash-verified

87	// ─── metric extraction ───────────────────────────────────────────────────────
88
89	function summarize(run) {
90	const results = run.data.results;
91	const total = results.length;
92	const passed = results.filter((r) => r.passed).length;
93	const reward = total > 0 ? passed / total : 0;
94	const totalMs = results.reduce((s, r) => s + (r.elapsedMs \|\| 0), 0);
95	const totalToolCalls = results.reduce((s, r) => s + (r.toolCalls \|\| 0), 0);
96	// Build a per-task pass map keyed by `id` for diffing.
97	const byId = {};
98	for (const r of results) {
99	byId[r.id] = {
100	passed: !!r.passed,
101	elapsedMs: r.elapsedMs \|\| 0,
102	toolCalls: r.toolCalls \|\| 0,
103	verifyError: r.verifyError \|\| null,
104	};
105	}
106	return { passed, total, reward, totalMs, totalToolCalls, byId };
107	}
108
109	// ─── verdict ─────────────────────────────────────────────────────────────────
110

bench_diff.test.jsFile · 0.85

mainFunction · 0.85

no outgoing calls

no test coverage detected