Converts a PluginEvalResult into various output formats.
| 18 | |
| 19 | |
| 20 | class Reporter: |
| 21 | """Converts a PluginEvalResult into various output formats.""" |
| 22 | |
| 23 | # ------------------------------------------------------------------ |
| 24 | # JSON |
| 25 | # ------------------------------------------------------------------ |
| 26 | |
| 27 | def to_json(self, result: PluginEvalResult) -> str: |
| 28 | """Return a pretty-printed JSON string of the full result.""" |
| 29 | return result.model_dump_json(indent=2) |
| 30 | |
| 31 | # ------------------------------------------------------------------ |
| 32 | # Markdown |
| 33 | # ------------------------------------------------------------------ |
| 34 | |
| 35 | def to_markdown(self, result: PluginEvalResult) -> str: |
| 36 | lines: list[str] = [] |
| 37 | |
| 38 | lines.append("# PluginEval Report") |
| 39 | lines.append("") |
| 40 | lines.append(f"**Path:** `{result.plugin_path}`") |
| 41 | lines.append(f"**Timestamp:** {result.timestamp}") |
| 42 | requested = Depth(result.config.depth) |
| 43 | effective = _effective_depth(result) |
| 44 | if effective is requested: |
| 45 | lines.append(f"**Depth:** {requested.value}") |
| 46 | else: |
| 47 | lines.append( |
| 48 | f"**Depth:** {requested.value} (requested) → {effective.value} (effective)" |
| 49 | ) |
| 50 | lines.append("") |
| 51 | |
| 52 | if effective is not requested: |
| 53 | lines.append( |
| 54 | "> **Note:** Requested depth `" |
| 55 | f"{requested.value}` was downgraded to `{effective.value}` " |
| 56 | "because plugin-level evaluation only runs the static layer. " |
| 57 | "Judge and Monte Carlo layers require per-skill evaluation — " |
| 58 | "point at an individual skill directory to use the deeper " |
| 59 | "layers. Composite score and confidence reflect the layers " |
| 60 | "actually run." |
| 61 | ) |
| 62 | lines.append("") |
| 63 | |
| 64 | # Overall Score |
| 65 | lines.append("## Overall Score") |
| 66 | lines.append("") |
| 67 | if result.composite: |
| 68 | c = result.composite |
| 69 | score_str = f"{c.score:.1f}/100" |
| 70 | badge_str = c.badge.value.replace("_", " ").title() |
| 71 | lines.append("| Metric | Value |") |
| 72 | lines.append("|--------|-------|") |
| 73 | lines.append(f"| Score | **{score_str}** |") |
| 74 | lines.append(f"| Confidence | {c.confidence_label} |") |
| 75 | lines.append(f"| Badge | {badge_str} |") |
| 76 | if c.ci_lower is not None and c.ci_upper is not None: |
| 77 | lines.append(f"| 95% CI | [{c.ci_lower:.1f}, {c.ci_upper:.1f}] |") |
no outgoing calls