(argv: list[str] | None = None)
| 86 | |
| 87 | |
| 88 | def main(argv: list[str] | None = None) -> int: |
| 89 | args = _build_parser().parse_args(argv) |
| 90 | logging.basicConfig( |
| 91 | level=logging.INFO if args.verbose else logging.WARNING, |
| 92 | format="%(asctime)s %(levelname)s %(name)s: %(message)s", |
| 93 | ) |
| 94 | |
| 95 | _maybe_install_judge(args.judge_model) |
| 96 | _validate_scorer_default(args.scorer_default) |
| 97 | |
| 98 | report = Evaluator( |
| 99 | default_scorer=args.scorer_default, |
| 100 | judge_model=args.judge_model, |
| 101 | ).evaluate( |
| 102 | trajectories_path=args.trajectories, |
| 103 | scenarios_paths=list(args.scenarios), |
| 104 | ) |
| 105 | |
| 106 | out_dir = write_reports_dir(report, args.reports_dir) |
| 107 | print(render_summary(report)) |
| 108 | print(f"\nReports written: {out_dir}/<run_id>.json ({len(report.results)} files)") |
| 109 | print(f"Aggregate: {out_dir}/_aggregate.json") |
| 110 | return 0 |
| 111 | |
| 112 | |
| 113 | if __name__ == "__main__": |
no test coverage detected