MCPcopy
hub / github.com/wshobson/agents / _run_score

Function _run_score

plugins/plugin-eval/src/plugin_eval/cli.py:32–98  ·  view source on GitHub ↗

Core scoring logic; returns exit code.

(
    path: Path,
    depth: Depth,
    output: str,
    verbose: bool,
    concurrency: int,
    threshold: float | None,
)

Source from the content-addressed store, hash-verified

30
31
32def _run_score(
33 path: Path,
34 depth: Depth,
35 output: str,
36 verbose: bool,
37 concurrency: int,
38 threshold: float | None,
39) -> int:
40 """Core scoring logic; returns exit code."""
41 if not path.exists():
42 console.print(f"[red]Error: Path does not exist: {path}[/red]")
43 raise typer.Exit(code=2)
44
45 config = EvalConfig(
46 depth=depth,
47 output_format=output,
48 verbose=verbose,
49 concurrency=concurrency,
50 )
51 engine = EvalEngine(config)
52
53 target = _detect_target(path)
54 if target == "skill":
55 result = engine.evaluate_skill(path)
56 elif target == "plugin":
57 if depth != Depth.QUICK:
58 stderr_console.print(
59 f"[yellow]warning:[/yellow] plugin-level evaluation only runs the "
60 f"static layer; judge and Monte Carlo layers require per-skill "
61 f"evaluation. Requested depth [bold]{depth.value}[/bold] will be "
62 f"served from the static layer only — confidence label will be "
63 f"[bold]Estimated[/bold] regardless. To use the deeper layers, "
64 f"point at an individual skill directory."
65 )
66 result = engine.evaluate_plugin(path)
67 else:
68 # Attempt skill evaluation as fallback
69 result = engine.evaluate_skill(path)
70
71 reporter = Reporter()
72 if output == "json":
73 typer.echo(reporter.to_json(result))
74 elif output == "html":
75 typer.echo(reporter.to_html(result))
76 else:
77 # Default: markdown
78 typer.echo(reporter.to_markdown(result))
79
80 judge_layer = next((lr for lr in result.layers if lr.layer == "judge"), None)
81 if judge_layer is not None:
82 unmeasured = judge_layer.metadata.get("unmeasured") or []
83 if unmeasured:
84 stderr_console.print(
85 f"[yellow]warning:[/yellow] LLM judge could not measure "
86 f"{', '.join(unmeasured)}; composite computed from the remaining "
87 f"layers. Check that claude-agent-sdk is installed and a model is "
88 f"configured (run with --verbose for details)."
89 )

Callers 2

scoreFunction · 0.85
certifyFunction · 0.85

Calls 9

evaluate_skillMethod · 0.95
evaluate_pluginMethod · 0.95
to_jsonMethod · 0.95
to_htmlMethod · 0.95
to_markdownMethod · 0.95
EvalConfigClass · 0.90
EvalEngineClass · 0.90
ReporterClass · 0.90
_detect_targetFunction · 0.85

Tested by

no test coverage detected