hub / github.com/wshobson/agents / Reporter

Class Reporter

plugins/plugin-eval/src/plugin_eval/reporter.py:20–215 · view source on GitHub ↗

Converts a PluginEvalResult into various output formats.

Source from the content-addressed store, hash-verified

18
19
20	class Reporter:
21	"""Converts a PluginEvalResult into various output formats."""
22
23	# ------------------------------------------------------------------
24	# JSON
25	# ------------------------------------------------------------------
26
27	def to_json(self, result: PluginEvalResult) -> str:
28	"""Return a pretty-printed JSON string of the full result."""
29	return result.model_dump_json(indent=2)
30
31	# ------------------------------------------------------------------
32	# Markdown
33	# ------------------------------------------------------------------
34
35	def to_markdown(self, result: PluginEvalResult) -> str:
36	lines: list[str] = []
37
38	lines.append("# PluginEval Report")
39	lines.append("")
40	lines.append(f"Path: `{result.plugin_path}`")
41	lines.append(f"Timestamp: {result.timestamp}")
42	requested = Depth(result.config.depth)
43	effective = _effective_depth(result)
44	if effective is requested:
45	lines.append(f"Depth: {requested.value}")
46	else:
47	lines.append(
48	f"Depth: {requested.value} (requested) → {effective.value} (effective)"
49	)
50	lines.append("")
51
52	if effective is not requested:
53	lines.append(
54	"> Note: Requested depth `"
55	f"{requested.value}` was downgraded to `{effective.value}` "
56	"because plugin-level evaluation only runs the static layer. "
57	"Judge and Monte Carlo layers require per-skill evaluation — "
58	"point at an individual skill directory to use the deeper "
59	"layers. Composite score and confidence reflect the layers "
60	"actually run."
61	)
62	lines.append("")
63
64	# Overall Score
65	lines.append("## Overall Score")
66	lines.append("")
67	if result.composite:
68	c = result.composite
69	score_str = f"{c.score:.1f}/100"
70	badge_str = c.badge.value.replace("_", " ").title()
71	lines.append("\| Metric \| Value \|")
72	lines.append("\|--------\|-------\|")
73	lines.append(f"\| Score \| {score_str} \|")
74	lines.append(f"\| Confidence \| {c.confidence_label} \|")
75	lines.append(f"\| Badge \| {badge_str} \|")
76	if c.ci_lower is not None and c.ci_upper is not None:
77	lines.append(f"\| 95% CI \| [{c.ci_lower:.1f}, {c.ci_upper:.1f}] \|")

Callers 6

_run_scoreFunction · 0.90

test_json_outputMethod · 0.90

test_markdown_outputMethod · 0.90

test_markdown_shows_no_warning_when_depth_was_honoredMethod · 0.90

test_markdown_shows_warning_when_plugin_eval_downgrades_depthMethod · 0.90

test_markdown_shows_warning_when_standard_depth_is_downgradedMethod · 0.90

Calls

no outgoing calls

Tested by 5

test_json_outputMethod · 0.72

test_markdown_outputMethod · 0.72

test_markdown_shows_no_warning_when_depth_was_honoredMethod · 0.72

test_markdown_shows_warning_when_plugin_eval_downgrades_depthMethod · 0.72

test_markdown_shows_warning_when_standard_depth_is_downgradedMethod · 0.72