MCPcopy
hub / github.com/wshobson/agents / evaluate_plugin

Method evaluate_plugin

plugins/plugin-eval/src/plugin_eval/engine.py:129–159  ·  view source on GitHub ↗

Run evaluation on an entire plugin directory (all skills + agents). Note: Plugin-level evaluation currently only runs Layer 1 (static). Judge and Monte Carlo require per-skill evaluation. The confidence label is always "Estimated" regardless of requested depth.

(self, plugin_dir: Path)

Source from the content-addressed store, hash-verified

127 )
128
129 def evaluate_plugin(self, plugin_dir: Path) -> PluginEvalResult:
130 """Run evaluation on an entire plugin directory (all skills + agents).
131
132 Note: Plugin-level evaluation currently only runs Layer 1 (static).
133 Judge and Monte Carlo require per-skill evaluation. The confidence
134 label is always "Estimated" regardless of requested depth.
135 """
136 layers: list[LayerResult] = []
137
138 # Layer 1: Static analysis of whole plugin
139 static_result = self._static.analyze_plugin(plugin_dir)
140 layers.append(static_result)
141
142 # Plugin-level composite uses overall static score mapped to all
143 # static-measurable dimensions (plugin result lacks per-dimension breakdown)
144 static_overall = static_result.score
145 dimension_scores = {dim: static_overall for dim in STATIC_TO_DIMENSION.values()}
146 anti_pattern_count = len(static_result.anti_patterns)
147 composite = self._assemble_composite(dimension_scores, anti_pattern_count)
148
149 # Plugin-level eval only has static data — always "Estimated"
150 # regardless of requested depth (judge/MC are per-skill only)
151 composite.confidence_label = Depth.QUICK.confidence_label
152
153 return PluginEvalResult(
154 plugin_path=str(plugin_dir),
155 timestamp=datetime.now(UTC).isoformat(),
156 config=self.config,
157 layers=layers,
158 composite=composite,
159 )
160
161 # ------------------------------------------------------------------
162 # Composite construction

Calls 3

_assemble_compositeMethod · 0.95
PluginEvalResultClass · 0.90
analyze_pluginMethod · 0.80