hub / github.com/wshobson/agents / to_markdown

Method to_markdown

plugins/plugin-eval/src/plugin_eval/reporter.py:35–160 · view source on GitHub ↗

(self, result: PluginEvalResult)

Source from the content-addressed store, hash-verified

33	# ------------------------------------------------------------------
34
35	def to_markdown(self, result: PluginEvalResult) -> str:
36	lines: list[str] = []
37
38	lines.append("# PluginEval Report")
39	lines.append("")
40	lines.append(f"Path: `{result.plugin_path}`")
41	lines.append(f"Timestamp: {result.timestamp}")
42	requested = Depth(result.config.depth)
43	effective = _effective_depth(result)
44	if effective is requested:
45	lines.append(f"Depth: {requested.value}")
46	else:
47	lines.append(
48	f"Depth: {requested.value} (requested) → {effective.value} (effective)"
49	)
50	lines.append("")
51
52	if effective is not requested:
53	lines.append(
54	"> Note: Requested depth `"
55	f"{requested.value}` was downgraded to `{effective.value}` "
56	"because plugin-level evaluation only runs the static layer. "
57	"Judge and Monte Carlo layers require per-skill evaluation — "
58	"point at an individual skill directory to use the deeper "
59	"layers. Composite score and confidence reflect the layers "
60	"actually run."
61	)
62	lines.append("")
63
64	# Overall Score
65	lines.append("## Overall Score")
66	lines.append("")
67	if result.composite:
68	c = result.composite
69	score_str = f"{c.score:.1f}/100"
70	badge_str = c.badge.value.replace("_", " ").title()
71	lines.append("\| Metric \| Value \|")
72	lines.append("\|--------\|-------\|")
73	lines.append(f"\| Score \| {score_str} \|")
74	lines.append(f"\| Confidence \| {c.confidence_label} \|")
75	lines.append(f"\| Badge \| {badge_str} \|")
76	if c.ci_lower is not None and c.ci_upper is not None:
77	lines.append(f"\| 95% CI \| [{c.ci_lower:.1f}, {c.ci_upper:.1f}] \|")
78	if c.anti_pattern_penalty < 1.0:
79	penalty_pct = (1.0 - c.anti_pattern_penalty) * 100
80	lines.append(f"\| Anti-Pattern Penalty \| -{penalty_pct:.0f}% \|")
81	else:
82	lines.append("_No composite score available._")
83	lines.append("")
84
85	# Elo Rating (if present)
86	if result.elo:
87	elo = result.elo
88	lines.append("## Elo Rating")
89	lines.append("")
90	lines.append("\| Metric \| Value \|")
91	lines.append("\|--------\|-------\|")
92	lines.append(f"\| Rating \| {elo.rating:.0f} \|")

Callers 6

_run_scoreFunction · 0.95

to_htmlMethod · 0.95

test_markdown_outputMethod · 0.95

test_markdown_shows_no_warning_when_depth_was_honoredMethod · 0.80

test_markdown_shows_warning_when_plugin_eval_downgrades_depthMethod · 0.80

test_markdown_shows_warning_when_standard_depth_is_downgradedMethod · 0.80

Calls 2

DepthClass · 0.90

_effective_depthFunction · 0.85

Tested by 4

test_markdown_outputMethod · 0.76

test_markdown_shows_no_warning_when_depth_was_honoredMethod · 0.64

test_markdown_shows_warning_when_plugin_eval_downgrades_depthMethod · 0.64

test_markdown_shows_warning_when_standard_depth_is_downgradedMethod · 0.64