(
label: str, model: str, run_dir: str, files: list[str], jobs: int
)
| 88 | |
| 89 | |
| 90 | def run_extractor( |
| 91 | label: str, model: str, run_dir: str, files: list[str], jobs: int |
| 92 | ) -> dict[str, ExtractionResult]: |
| 93 | cfg = ExtractorConfig(model=model, run_dir=run_dir) |
| 94 | ext = make_extractor("llm", cfg) |
| 95 | results: dict[str, ExtractionResult] = {} |
| 96 | counter = {"n": 0} |
| 97 | total = len(files) |
| 98 | |
| 99 | def on_result(path: str, entry: ExtractionResult) -> None: |
| 100 | counter["n"] += 1 |
| 101 | results[path] = entry |
| 102 | outcome = entry.outcome.value |
| 103 | n_opts = ( |
| 104 | len(entry.mp.options) if entry.outcome == ExtractionOutcome.SUCCESS else 0 |
| 105 | ) |
| 106 | print( |
| 107 | f" [{label} {counter['n']}/{total}] {os.path.basename(path)} " |
| 108 | f"{outcome} opts={n_opts}", |
| 109 | flush=True, |
| 110 | ) |
| 111 | |
| 112 | print(f"\n=== {label} ({model}) on {len(files)} files, jobs={jobs} ===", flush=True) |
| 113 | t0 = time.monotonic() |
| 114 | run(ext, files, jobs=jobs, on_result=on_result) |
| 115 | print(f" {label} done in {time.monotonic() - t0:.0f}s", flush=True) |
| 116 | return results |
| 117 | |
| 118 | |
| 119 | def main() -> int: |
no test coverage detected