从图像中提取文字 Args: image_path: 图像文件路径 enable_hybrid: 是否启用混合识别(Paddle + Tesseract) Returns: Tuple[str, Optional[str]]: (主识别结果, 备用识别结果)
(self, image_path: str, enable_hybrid: bool = True)
| 195 | return False |
| 196 | |
| 197 | def extract_text_from_image(self, image_path: str, enable_hybrid: bool = True) -> Tuple[str, Optional[str]]: |
| 198 | """ |
| 199 | 从图像中提取文字 |
| 200 | |
| 201 | Args: |
| 202 | image_path: 图像文件路径 |
| 203 | enable_hybrid: 是否启用混合识别(Paddle + Tesseract) |
| 204 | |
| 205 | Returns: |
| 206 | Tuple[str, Optional[str]]: (主识别结果, 备用识别结果) |
| 207 | """ |
| 208 | if not self.is_available(): |
| 209 | logger.warning("OCR引擎不可用") |
| 210 | return "", None |
| 211 | |
| 212 | if not os.path.exists(image_path): |
| 213 | logger.error(f"图像文件不存在: {image_path}") |
| 214 | return "", None |
| 215 | |
| 216 | try: |
| 217 | # 验证图像文件 |
| 218 | with Image.open(image_path) as img: |
| 219 | img.verify() |
| 220 | except Exception as e: |
| 221 | logger.error(f"打开图片失败: {image_path}: {e}") |
| 222 | return "", None |
| 223 | |
| 224 | primary_text = "" |
| 225 | secondary_text = None |
| 226 | |
| 227 | # 使用主引擎识别 |
| 228 | if self._engine: |
| 229 | try: |
| 230 | result = self._engine.run(image_path) |
| 231 | primary_text = result.get_text() if result else "" |
| 232 | logger.debug(f"主引擎识别结果: {len(primary_text)} 字符") |
| 233 | except Exception as e: |
| 234 | logger.error(f"主引擎识别失败: {e}") |
| 235 | |
| 236 | # 混合识别:使用备用引擎 |
| 237 | if enable_hybrid and self._engine_paddle and self._engine_tess: |
| 238 | # 选择备用引擎,确保它是可用的 |
| 239 | if self._engine == self._engine_paddle: |
| 240 | backup_engine = self._engine_tess |
| 241 | backup_name = "Tesseract" |
| 242 | else: |
| 243 | backup_engine = self._engine_paddle |
| 244 | backup_name = "PaddleOCR" |
| 245 | |
| 246 | # 检查备用引擎是否真的可用 |
| 247 | backup_available = False |
| 248 | try: |
| 249 | if backup_engine: |
| 250 | # 简单测试引擎是否能工作 |
| 251 | backup_available = (backup_engine._paddle is not None) or (hasattr(backup_engine, '_has_tesseract') and backup_engine._has_tesseract) |
| 252 | except: |
| 253 | backup_available = False |
| 254 |
no test coverage detected