()
| 13 | |
| 14 | |
| 15 | def main() -> int: |
| 16 | here = Path(__file__).resolve().parent |
| 17 | source_dir = here / "source" |
| 18 | output_dir = here / "result" |
| 19 | output_dir.mkdir(parents=True, exist_ok=True) |
| 20 | |
| 21 | if not source_dir.exists(): |
| 22 | raise RuntimeError(f"Missing examples source dir: {source_dir}") |
| 23 | |
| 24 | inputs = sorted( |
| 25 | [ |
| 26 | *source_dir.glob("*.png"), |
| 27 | *source_dir.glob("*.jpg"), |
| 28 | *source_dir.glob("*.jpeg"), |
| 29 | *source_dir.glob("*.pdf"), |
| 30 | ] |
| 31 | ) |
| 32 | if not inputs: |
| 33 | raise RuntimeError(f"No input files found under: {source_dir}") |
| 34 | |
| 35 | print(f"Found {len(inputs)} inputs under {source_dir}") |
| 36 | print(f"Writing results to {output_dir}") |
| 37 | |
| 38 | poppler_ok = any( |
| 39 | shutil.which(cmd) is not None for cmd in ("pdfinfo", "pdftoppm", "pdftocairo") |
| 40 | ) |
| 41 | if not poppler_ok and any(p.suffix.lower() == ".pdf" for p in inputs): |
| 42 | print( |
| 43 | "Poppler not found (pdfinfo/pdftoppm/pdftocairo). " |
| 44 | "PDF inputs will be skipped. On macOS: brew install poppler" |
| 45 | ) |
| 46 | |
| 47 | with GlmOcr() as parser: |
| 48 | for p in inputs: |
| 49 | print(f"\n=== Parsing: {p.name} ===") |
| 50 | if p.suffix.lower() == ".pdf" and not poppler_ok: |
| 51 | print("Skipping PDF (missing poppler)") |
| 52 | continue |
| 53 | |
| 54 | try: |
| 55 | result = parser.parse(str(p)) |
| 56 | result.save(output_dir=output_dir) |
| 57 | except Exception as e: |
| 58 | print(f"Failed: {p.name}: {e}") |
| 59 | continue |
| 60 | |
| 61 | print("\nAll done.") |
| 62 | return 0 |
| 63 | |
| 64 | |
| 65 | if __name__ == "__main__": |
no test coverage detected