()
| 228 | |
| 229 | |
| 230 | def main() -> int: |
| 231 | parser = argparse.ArgumentParser() |
| 232 | parser.add_argument( |
| 233 | "--depth", default="quick", choices=list(DEPTH_MAP.keys()) |
| 234 | ) |
| 235 | parser.add_argument("--output-dir", default="eval-reports") |
| 236 | parser.add_argument( |
| 237 | "--concurrency", |
| 238 | type=int, |
| 239 | default=4, |
| 240 | help="Max concurrent LLM calls for Layer 2/3", |
| 241 | ) |
| 242 | parser.add_argument( |
| 243 | "--threshold", |
| 244 | type=float, |
| 245 | default=None, |
| 246 | help="Exit 1 if mean score below this (0-100)", |
| 247 | ) |
| 248 | parser.add_argument( |
| 249 | "--only-changed", |
| 250 | default=None, |
| 251 | help="Comma-separated plugin names to limit evaluation to", |
| 252 | ) |
| 253 | args = parser.parse_args() |
| 254 | |
| 255 | output_dir = Path(args.output_dir) |
| 256 | output_dir.mkdir(parents=True, exist_ok=True) |
| 257 | |
| 258 | plugins = discover_plugins() |
| 259 | if args.only_changed: |
| 260 | wanted = {n.strip() for n in args.only_changed.split(",") if n.strip()} |
| 261 | plugins = [p for p in plugins if p.name in wanted] |
| 262 | |
| 263 | config = EvalConfig( |
| 264 | depth=DEPTH_MAP[args.depth], |
| 265 | concurrency=args.concurrency, |
| 266 | ) |
| 267 | |
| 268 | started_at = time.strftime("%Y-%m-%d %H:%M:%S UTC", time.gmtime()) |
| 269 | print( |
| 270 | f"[eval_all] evaluating {len(plugins)} plugins at depth={args.depth} " |
| 271 | f"concurrency={args.concurrency}", |
| 272 | file=sys.stderr, |
| 273 | ) |
| 274 | |
| 275 | rows: list[PluginRow] = [] |
| 276 | for i, plugin_dir in enumerate(plugins, 1): |
| 277 | print( |
| 278 | f"[eval_all] ({i}/{len(plugins)}) {plugin_dir.name}…", |
| 279 | file=sys.stderr, |
| 280 | ) |
| 281 | row = evaluate_one(plugin_dir, config, output_dir) |
| 282 | rows.append(row) |
| 283 | |
| 284 | summary_md = build_summary_md(rows, args.depth, started_at) |
| 285 | (output_dir / "summary.md").write_text(summary_md) |
| 286 | (output_dir / "summary.json").write_text( |
| 287 | json.dumps([asdict(r) for r in rows], indent=2) |
no test coverage detected