MCPcopy
hub / github.com/anthropics/skills / main

Function main

skills/skill-creator/scripts/run_eval.py:259–306  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

257
258
259def main():
260 parser = argparse.ArgumentParser(description="Run trigger evaluation for a skill description")
261 parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
262 parser.add_argument("--skill-path", required=True, help="Path to skill directory")
263 parser.add_argument("--description", default=None, help="Override description to test")
264 parser.add_argument("--num-workers", type=int, default=10, help="Number of parallel workers")
265 parser.add_argument("--timeout", type=int, default=30, help="Timeout per query in seconds")
266 parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query")
267 parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
268 parser.add_argument("--model", default=None, help="Model to use for claude -p (default: user's configured model)")
269 parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
270 args = parser.parse_args()
271
272 eval_set = json.loads(Path(args.eval_set).read_text())
273 skill_path = Path(args.skill_path)
274
275 if not (skill_path / "SKILL.md").exists():
276 print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr)
277 sys.exit(1)
278
279 name, original_description, content = parse_skill_md(skill_path)
280 description = args.description or original_description
281 project_root = find_project_root()
282
283 if args.verbose:
284 print(f"Evaluating: {description}", file=sys.stderr)
285
286 output = run_eval(
287 eval_set=eval_set,
288 skill_name=name,
289 description=description,
290 num_workers=args.num_workers,
291 timeout=args.timeout,
292 project_root=project_root,
293 runs_per_query=args.runs_per_query,
294 trigger_threshold=args.trigger_threshold,
295 model=args.model,
296 )
297
298 if args.verbose:
299 summary = output["summary"]
300 print(f"Results: {summary['passed']}/{summary['total']} passed", file=sys.stderr)
301 for r in output["results"]:
302 status = "PASS" if r["pass"] else "FAIL"
303 rate_str = f"{r['triggers']}/{r['runs']}"
304 print(f" [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:70]}", file=sys.stderr)
305
306 print(json.dumps(output, indent=2))
307
308
309if __name__ == "__main__":

Callers 1

run_eval.pyFile · 0.70

Calls 3

parse_skill_mdFunction · 0.90
find_project_rootFunction · 0.85
run_evalFunction · 0.85

Tested by

no test coverage detected