Function build_report

src/evaluation/report.py:106–138 · view source on GitHub ↗

(results: list[ScenarioResult])

Source from the content-addressed store, hash-verified

104
105
106	def build_report(results: list[ScenarioResult]) -> EvalReport:
107	total = len(results)
108	passed = sum(1 for r in results if r.score.passed)
109
110	by_type: dict[str, list[ScenarioResult]] = defaultdict(list)
111	for r in results:
112	by_type[r.scenario_type or "unknown"].append(r)
113
114	breakdown: dict[str, TypeBreakdown] = {}
115	for stype, items in by_type.items():
116	n = len(items)
117	p = sum(1 for r in items if r.score.passed)
118	breakdown[stype] = TypeBreakdown(
119	total=n,
120	passed=p,
121	pass_rate=round(p / n, 4) if n else 0.0,
122	)
123
124	return EvalReport(
125	generated_at=_dt.datetime.now(_dt.timezone.utc).isoformat(),
126	runners=sorted({r.runner for r in results}),
127	models=sorted({r.model for r in results}),
128	totals={
129	"scenarios": total,
130	"scored": total,
131	"passed": passed,
132	"pass_rate": round(passed / total, 4) if total else 0.0,
133	},
134	by_scenario_type=breakdown,
135	ops=aggregate_ops(results),
136	score_summary=_aggregate_score_summary(results),
137	results=results,
138	)
139
140
141	def write_report(report: EvalReport, output: Path) -> Path:

test_build_report_totals_and_breakdownFunction · 0.90

test_build_report_handles_emptyFunction · 0.90

test_write_report_round_tripsFunction · 0.90

test_write_reports_dir_per_run_filesFunction · 0.90

test_write_reports_dir_falls_back_to_scenario_idFunction · 0.90

test_render_summary_includes_headlinesFunction · 0.90

test_build_report_includes_score_summaryFunction · 0.90

evaluateMethod · 0.85

TypeBreakdownClass · 0.85

EvalReportClass · 0.85

aggregate_opsFunction · 0.85

_aggregate_score_summaryFunction · 0.85

test_build_report_totals_and_breakdownFunction · 0.72

test_build_report_handles_emptyFunction · 0.72

test_write_report_round_tripsFunction · 0.72

test_write_reports_dir_per_run_filesFunction · 0.72

test_write_reports_dir_falls_back_to_scenario_idFunction · 0.72

test_render_summary_includes_headlinesFunction · 0.72

test_build_report_includes_score_summaryFunction · 0.72