Function _build_parser

src/evaluation/cli.py:15–66 · view source on GitHub ↗

()

Source from the content-addressed store, hash-verified

13
14
15	def _build_parser() -> argparse.ArgumentParser:
16	p = argparse.ArgumentParser(
17	prog="evaluate",
18	description=(
19	"Score saved agent trajectories against scenario files and "
20	"emit a JSON report."
21	),
22	)
23	p.add_argument(
24	"--trajectories",
25	type=Path,
26	required=True,
27	help="Directory of {run_id}.json trajectory files (or a single file).",
28	)
29	p.add_argument(
30	"--scenarios",
31	type=Path,
32	nargs="+",
33	required=True,
34	help="One or more scenario JSON / JSONL files.",
35	)
36	p.add_argument(
37	"--reports-dir",
38	type=Path,
39	default=Path("reports"),
40	help=(
41	"Directory to write per-run JSON reports (one file per run, "
42	"named '<run_id>.json'), plus '_aggregate.json' for the rollup. "
43	"Default: reports/."
44	),
45	)
46	p.add_argument(
47	"--scorer-default",
48	dest="scorer_default",
49	default="llm_judge",
50	help="Scorer name when scenario.scoring_method is unset. "
51	"Default: llm_judge.",
52	)
53	p.add_argument(
54	"--judge-model",
55	default=None,
56	help="Model id for the LLM-As-Judge scorer (e.g. "
57	"litellm_proxy/anthropic/claude-opus-4-5). "
58	"Required when any scenario routes to llm_judge.",
59	)
60	p.add_argument(
61	"-v",
62	"--verbose",
63	action="store_true",
64	help="Enable INFO-level logging.",
65	)
66	return p
67
68
69	def _maybe_install_judge(judge_model: str \| None) -> None:

mainFunction · 0.70

no outgoing calls

no test coverage detected