MCPcopy
hub / github.com/IBM/AssetOpsBench / build_report

Function build_report

src/evaluation/report.py:106–138  ·  view source on GitHub ↗
(results: list[ScenarioResult])

Source from the content-addressed store, hash-verified

104
105
106def build_report(results: list[ScenarioResult]) -> EvalReport:
107 total = len(results)
108 passed = sum(1 for r in results if r.score.passed)
109
110 by_type: dict[str, list[ScenarioResult]] = defaultdict(list)
111 for r in results:
112 by_type[r.scenario_type or "unknown"].append(r)
113
114 breakdown: dict[str, TypeBreakdown] = {}
115 for stype, items in by_type.items():
116 n = len(items)
117 p = sum(1 for r in items if r.score.passed)
118 breakdown[stype] = TypeBreakdown(
119 total=n,
120 passed=p,
121 pass_rate=round(p / n, 4) if n else 0.0,
122 )
123
124 return EvalReport(
125 generated_at=_dt.datetime.now(_dt.timezone.utc).isoformat(),
126 runners=sorted({r.runner for r in results}),
127 models=sorted({r.model for r in results}),
128 totals={
129 "scenarios": total,
130 "scored": total,
131 "passed": passed,
132 "pass_rate": round(passed / total, 4) if total else 0.0,
133 },
134 by_scenario_type=breakdown,
135 ops=aggregate_ops(results),
136 score_summary=_aggregate_score_summary(results),
137 results=results,
138 )
139
140
141def write_report(report: EvalReport, output: Path) -> Path:

Calls 4

TypeBreakdownClass · 0.85
EvalReportClass · 0.85
aggregate_opsFunction · 0.85
_aggregate_score_summaryFunction · 0.85