Unified dispatcher for all execution modes. - ``batch_size`` set → batch mode (requires ``BatchExtractor``). - ``jobs > 1`` → parallel mode via thread pool. - otherwise → sequential.
(
extractor: Extractor,
gz_files: list[str],
*,
manifest: BatchManifestWriter | None = None,
batch_size: int | None = None,
jobs: int = 1,
on_start: Callable[[str], None] | None = None,
on_result: Callable[[str, ExtractionResult], None] | None = None,
)
| 638 | |
| 639 | |
| 640 | def run( |
| 641 | extractor: Extractor, |
| 642 | gz_files: list[str], |
| 643 | *, |
| 644 | manifest: BatchManifestWriter | None = None, |
| 645 | batch_size: int | None = None, |
| 646 | jobs: int = 1, |
| 647 | on_start: Callable[[str], None] | None = None, |
| 648 | on_result: Callable[[str, ExtractionResult], None] | None = None, |
| 649 | ) -> BatchResult: |
| 650 | """Unified dispatcher for all execution modes. |
| 651 | |
| 652 | - ``batch_size`` set → batch mode (requires ``BatchExtractor``). |
| 653 | - ``jobs > 1`` → parallel mode via thread pool. |
| 654 | - otherwise → sequential. |
| 655 | """ |
| 656 | if batch_size is not None: |
| 657 | if batch_size < 1: |
| 658 | raise ValueError(f"batch_size must be >= 1 (got {batch_size})") |
| 659 | if not isinstance(extractor, BatchExtractor): |
| 660 | raise TypeError( |
| 661 | f"batch mode requires a BatchExtractor (got {type(extractor).__name__})" |
| 662 | ) |
| 663 | if manifest is None: |
| 664 | manifest = _NullBatchManifestWriter() |
| 665 | return run_batch( |
| 666 | extractor, |
| 667 | gz_files, |
| 668 | batch_size=batch_size, |
| 669 | jobs=jobs, |
| 670 | on_start=on_start, |
| 671 | on_result=on_result, |
| 672 | manifest=manifest, |
| 673 | ) |
| 674 | if jobs > 1: |
| 675 | return run_parallel( |
| 676 | extractor, |
| 677 | gz_files, |
| 678 | jobs, |
| 679 | on_start=on_start, |
| 680 | on_result=on_result, |
| 681 | ) |
| 682 | return run_sequential( |
| 683 | extractor, |
| 684 | gz_files, |
| 685 | on_start=on_start, |
| 686 | on_result=on_result, |
| 687 | ) |
| 688 | |
| 689 | |
| 690 | def run_collected( |