Runs segmentation, text extraction, and XML merge (see README pipeline).
| 78 | |
| 79 | # ======================== pipeline ======================== |
| 80 | class Pipeline: |
| 81 | """Runs segmentation, text extraction, and XML merge (see README pipeline).""" |
| 82 | |
| 83 | def __init__(self, config: dict = None): |
| 84 | self.config = config or load_config() |
| 85 | self._text_restorer = None |
| 86 | self._sam3_extractor = None |
| 87 | self._icon_processor = None |
| 88 | self._shape_processor = None |
| 89 | self._xml_merger = None |
| 90 | self._metric_evaluator = None |
| 91 | self._refinement_processor = None |
| 92 | |
| 93 | @property |
| 94 | def text_restorer(self): |
| 95 | """OCR/text step; None if deps missing.""" |
| 96 | if self._text_restorer is None and TextRestorer is not None: |
| 97 | ocr_engine = (self.config.get("ocr") or {}).get("engine", "tesseract") |
| 98 | self._text_restorer = TextRestorer( |
| 99 | formula_engine="none", |
| 100 | ocr_engine=ocr_engine, |
| 101 | ) |
| 102 | return self._text_restorer |
| 103 | |
| 104 | @property |
| 105 | def sam3_extractor(self) -> Sam3InfoExtractor: |
| 106 | if self._sam3_extractor is None: |
| 107 | self._sam3_extractor = Sam3InfoExtractor() |
| 108 | return self._sam3_extractor |
| 109 | |
| 110 | @property |
| 111 | def icon_processor(self) -> IconPictureProcessor: |
| 112 | if self._icon_processor is None: |
| 113 | rmbg_cfg = self.config.get("rmbg") or {} |
| 114 | rmbg_path = rmbg_cfg.get("model_path") |
| 115 | self._icon_processor = IconPictureProcessor(rmbg_model_path=rmbg_path) |
| 116 | return self._icon_processor |
| 117 | |
| 118 | @property |
| 119 | def shape_processor(self) -> BasicShapeProcessor: |
| 120 | if self._shape_processor is None: |
| 121 | self._shape_processor = BasicShapeProcessor() |
| 122 | return self._shape_processor |
| 123 | |
| 124 | @property |
| 125 | def xml_merger(self) -> XMLMerger: |
| 126 | if self._xml_merger is None: |
| 127 | self._xml_merger = XMLMerger() |
| 128 | return self._xml_merger |
| 129 | |
| 130 | @property |
| 131 | def metric_evaluator(self) -> MetricEvaluator: |
| 132 | if self._metric_evaluator is None: |
| 133 | self._metric_evaluator = MetricEvaluator() |
| 134 | return self._metric_evaluator |
| 135 | |
| 136 | @property |
| 137 | def refinement_processor(self) -> RefinementProcessor: |