(args: argparse.Namespace)
| 59 | |
| 60 | |
| 61 | def run_bench(args: argparse.Namespace) -> int: |
| 62 | if args.batch is not None and args.batch < 1: |
| 63 | print("error: --batch must be >= 1", file=sys.stderr) |
| 64 | return 1 |
| 65 | |
| 66 | if args.paths: |
| 67 | gz_files = collect_gz_files([str(p) for p in args.paths]) |
| 68 | else: |
| 69 | corpus_paths = _read_corpus(Path(args.corpus)) |
| 70 | if not corpus_paths: |
| 71 | print(f"corpus {args.corpus} is empty", file=sys.stderr) |
| 72 | return 1 |
| 73 | gz_files = [os.path.abspath(str(p)) for p in corpus_paths] |
| 74 | |
| 75 | if not gz_files: |
| 76 | print("No .gz files found.", file=sys.stderr) |
| 77 | return 1 |
| 78 | |
| 79 | timestamp = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") |
| 80 | label = re.sub(r"[^A-Za-z0-9_.-]+", "-", args.label).strip("-") or "llm" |
| 81 | run_dir = Path(args.output or DEFAULT_RUNS_DIR / f"{timestamp}-{label}") |
| 82 | run_dir.mkdir(parents=True, exist_ok=True) |
| 83 | |
| 84 | logger.info("benchmarking %d file(s)...", len(gz_files)) |
| 85 | |
| 86 | config = ExtractorConfig( |
| 87 | model=args.model, |
| 88 | run_dir=str(run_dir), |
| 89 | repo_root=str(REPO_ROOT), |
| 90 | debug=True, |
| 91 | ) |
| 92 | extractor = make_extractor("llm", config) |
| 93 | |
| 94 | pages_by_path: dict[str, dict[str, Any]] = {} |
| 95 | failures: list[dict[str, str]] = [] |
| 96 | |
| 97 | def _on_result(gz_path: str, fe: ExtractionResult) -> None: |
| 98 | rel_path = _repo_relative(Path(fe.gz_path)) |
| 99 | stem = _path_stem(rel_path) |
| 100 | |
| 101 | extraction: dict[str, Any] = { |
| 102 | "n_chunks": fe.stats.chunks, |
| 103 | "plain_text_len": fe.stats.plain_text_len, |
| 104 | } |
| 105 | if fe.outcome == ExtractionOutcome.SUCCESS and fe.mp: |
| 106 | extraction["success"] = True |
| 107 | extraction["n_options"] = len(fe.mp.options) |
| 108 | extraction["dashless_opts"] = fe.mp.dashless_opts |
| 109 | extraction["n_aliases"] = len(fe.mp.aliases) |
| 110 | extraction["has_synopsis"] = bool(fe.mp.synopsis) |
| 111 | else: |
| 112 | extraction["success"] = False |
| 113 | extraction["n_options"] = 0 |
| 114 | error = fe.error or ( |
| 115 | "skipped" |
| 116 | if fe.outcome == ExtractionOutcome.SKIPPED |
| 117 | else "extraction failed" |
| 118 | ) |
nothing calls this directly
no test coverage detected