MCPcopy
hub / github.com/wshobson/agents / to_markdown

Method to_markdown

plugins/plugin-eval/src/plugin_eval/reporter.py:35–160  ·  view source on GitHub ↗
(self, result: PluginEvalResult)

Source from the content-addressed store, hash-verified

33 # ------------------------------------------------------------------
34
35 def to_markdown(self, result: PluginEvalResult) -> str:
36 lines: list[str] = []
37
38 lines.append("# PluginEval Report")
39 lines.append("")
40 lines.append(f"**Path:** `{result.plugin_path}`")
41 lines.append(f"**Timestamp:** {result.timestamp}")
42 requested = Depth(result.config.depth)
43 effective = _effective_depth(result)
44 if effective is requested:
45 lines.append(f"**Depth:** {requested.value}")
46 else:
47 lines.append(
48 f"**Depth:** {requested.value} (requested) → {effective.value} (effective)"
49 )
50 lines.append("")
51
52 if effective is not requested:
53 lines.append(
54 "> **Note:** Requested depth `"
55 f"{requested.value}` was downgraded to `{effective.value}` "
56 "because plugin-level evaluation only runs the static layer. "
57 "Judge and Monte Carlo layers require per-skill evaluation — "
58 "point at an individual skill directory to use the deeper "
59 "layers. Composite score and confidence reflect the layers "
60 "actually run."
61 )
62 lines.append("")
63
64 # Overall Score
65 lines.append("## Overall Score")
66 lines.append("")
67 if result.composite:
68 c = result.composite
69 score_str = f"{c.score:.1f}/100"
70 badge_str = c.badge.value.replace("_", " ").title()
71 lines.append("| Metric | Value |")
72 lines.append("|--------|-------|")
73 lines.append(f"| Score | **{score_str}** |")
74 lines.append(f"| Confidence | {c.confidence_label} |")
75 lines.append(f"| Badge | {badge_str} |")
76 if c.ci_lower is not None and c.ci_upper is not None:
77 lines.append(f"| 95% CI | [{c.ci_lower:.1f}, {c.ci_upper:.1f}] |")
78 if c.anti_pattern_penalty < 1.0:
79 penalty_pct = (1.0 - c.anti_pattern_penalty) * 100
80 lines.append(f"| Anti-Pattern Penalty | -{penalty_pct:.0f}% |")
81 else:
82 lines.append("_No composite score available._")
83 lines.append("")
84
85 # Elo Rating (if present)
86 if result.elo:
87 elo = result.elo
88 lines.append("## Elo Rating")
89 lines.append("")
90 lines.append("| Metric | Value |")
91 lines.append("|--------|-------|")
92 lines.append(f"| Rating | {elo.rating:.0f} |")

Calls 2

DepthClass · 0.90
_effective_depthFunction · 0.85