MCPcopy Index your code
hub / github.com/idank/explainshell / run_bench

Function run_bench

tests/evals/llm/llm_eval.py:61–209  ·  view source on GitHub ↗
(args: argparse.Namespace)

Source from the content-addressed store, hash-verified

59
60
61def run_bench(args: argparse.Namespace) -> int:
62 if args.batch is not None and args.batch < 1:
63 print("error: --batch must be >= 1", file=sys.stderr)
64 return 1
65
66 if args.paths:
67 gz_files = collect_gz_files([str(p) for p in args.paths])
68 else:
69 corpus_paths = _read_corpus(Path(args.corpus))
70 if not corpus_paths:
71 print(f"corpus {args.corpus} is empty", file=sys.stderr)
72 return 1
73 gz_files = [os.path.abspath(str(p)) for p in corpus_paths]
74
75 if not gz_files:
76 print("No .gz files found.", file=sys.stderr)
77 return 1
78
79 timestamp = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
80 label = re.sub(r"[^A-Za-z0-9_.-]+", "-", args.label).strip("-") or "llm"
81 run_dir = Path(args.output or DEFAULT_RUNS_DIR / f"{timestamp}-{label}")
82 run_dir.mkdir(parents=True, exist_ok=True)
83
84 logger.info("benchmarking %d file(s)...", len(gz_files))
85
86 config = ExtractorConfig(
87 model=args.model,
88 run_dir=str(run_dir),
89 repo_root=str(REPO_ROOT),
90 debug=True,
91 )
92 extractor = make_extractor("llm", config)
93
94 pages_by_path: dict[str, dict[str, Any]] = {}
95 failures: list[dict[str, str]] = []
96
97 def _on_result(gz_path: str, fe: ExtractionResult) -> None:
98 rel_path = _repo_relative(Path(fe.gz_path))
99 stem = _path_stem(rel_path)
100
101 extraction: dict[str, Any] = {
102 "n_chunks": fe.stats.chunks,
103 "plain_text_len": fe.stats.plain_text_len,
104 }
105 if fe.outcome == ExtractionOutcome.SUCCESS and fe.mp:
106 extraction["success"] = True
107 extraction["n_options"] = len(fe.mp.options)
108 extraction["dashless_opts"] = fe.mp.dashless_opts
109 extraction["n_aliases"] = len(fe.mp.aliases)
110 extraction["has_synopsis"] = bool(fe.mp.synopsis)
111 else:
112 extraction["success"] = False
113 extraction["n_options"] = 0
114 error = fe.error or (
115 "skipped"
116 if fe.outcome == ExtractionOutcome.SKIPPED
117 else "extraction failed"
118 )

Callers

nothing calls this directly

Calls 10

collect_gz_filesFunction · 0.90
_read_corpusFunction · 0.90
ExtractorConfigClass · 0.90
make_extractorFunction · 0.90
_git_metadataFunction · 0.90
_repo_relativeFunction · 0.90
_write_jsonFunction · 0.90
roundFunction · 0.85
_print_summaryFunction · 0.85

Tested by

no test coverage detected