Assemble ExtractionResult from prepared data + chunk results.
(
self,
gz_path: str,
prepared: PreparedFile,
all_chunk_data: list[ChunkResult],
stats: ExtractionStats,
)
| 366 | return self._finalize(gz_path, prepared, all_chunk_data, stats) |
| 367 | |
| 368 | def _finalize( |
| 369 | self, |
| 370 | gz_path: str, |
| 371 | prepared: PreparedFile, |
| 372 | all_chunk_data: list[ChunkResult], |
| 373 | stats: ExtractionStats, |
| 374 | ) -> ExtractionResult: |
| 375 | """Assemble ExtractionResult from prepared data + chunk results.""" |
| 376 | basename = prepared.basename |
| 377 | original_lines = prepared.original_lines |
| 378 | n_chunks = prepared.n_chunks |
| 379 | numbered_text = prepared.numbered_text |
| 380 | |
| 381 | stem = self._artifact_stem(gz_path) |
| 382 | if self._run_dir and self._debug: |
| 383 | with open(self._artifact_path("markdown", stem, ".md"), "w") as f: |
| 384 | f.write(numbered_text) |
| 385 | |
| 386 | all_raw: list[dict] = [] |
| 387 | dashless_opts = False |
| 388 | all_subcommands: list[str] = [] |
| 389 | for i, cr in enumerate(all_chunk_data): |
| 390 | all_raw.extend(cr.data["options"]) |
| 391 | if cr.data.get("dashless_opts"): |
| 392 | dashless_opts = True |
| 393 | all_subcommands.extend(cr.data.get("subcommands") or []) |
| 394 | |
| 395 | if self._run_dir and self._debug: |
| 396 | chunk_suffix = "" if n_chunks == 1 else f".chunk-{i}" |
| 397 | with open( |
| 398 | self._artifact_path("prompts", stem, f"{chunk_suffix}.prompt.json"), |
| 399 | "w", |
| 400 | ) as f: |
| 401 | json.dump(cr.messages, f, indent=2) |
| 402 | with open( |
| 403 | self._artifact_path( |
| 404 | "responses", stem, f"{chunk_suffix}.response.txt" |
| 405 | ), |
| 406 | "w", |
| 407 | ) as f: |
| 408 | f.write(cr.raw_response) |
| 409 | |
| 410 | all_raw = dedup_ref_options(all_raw) |
| 411 | |
| 412 | options = [] |
| 413 | for idx, raw_opt in enumerate(all_raw): |
| 414 | try: |
| 415 | if normalize_option_fields(raw_opt) != raw_opt: |
| 416 | stats.normalized_options += 1 |
| 417 | options.append(llm_option_to_store_option(raw_opt, original_lines)) |
| 418 | except (ValueError, ValidationError) as e: |
| 419 | logger.warning( |
| 420 | "%s: skipping malformed option %d: %s\n raw: %s", |
| 421 | basename, |
| 422 | idx, |
| 423 | e, |
| 424 | json.dumps(raw_opt, default=str)[:200], |
| 425 | ) |
no test coverage detected