运行OCR识别 Args: img: 输入图像,可以是文件路径、PIL图像或numpy数组 Returns: OCR识别结果
(self, img: Any)
| 271 | return img |
| 272 | |
| 273 | def run(self, img: Any) -> OCRResult: |
| 274 | """ |
| 275 | 运行OCR识别 |
| 276 | |
| 277 | Args: |
| 278 | img: 输入图像,可以是文件路径、PIL图像或numpy数组 |
| 279 | |
| 280 | Returns: |
| 281 | OCR识别结果 |
| 282 | """ |
| 283 | # 1) PaddleOCR路径 |
| 284 | if self._paddle is not None: |
| 285 | try: |
| 286 | logger.debug("尝试使用PaddleOCR识别") |
| 287 | processed_img = self._resize_image_if_needed(img, max_side=4000) |
| 288 | |
| 289 | # 准备PaddleOCR的输入 |
| 290 | paddle_input = None |
| 291 | if isinstance(processed_img, str): |
| 292 | paddle_input = processed_img |
| 293 | else: |
| 294 | try: |
| 295 | import numpy as np |
| 296 | if isinstance(processed_img, np.ndarray): |
| 297 | paddle_input = processed_img |
| 298 | else: |
| 299 | pil = self._to_pil(processed_img) |
| 300 | paddle_input = np.array(pil) |
| 301 | except ImportError: |
| 302 | pil = self._to_pil(processed_img) |
| 303 | # 如果无法转换为numpy,保存临时文件 |
| 304 | import tempfile |
| 305 | with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp: |
| 306 | pil.save(tmp.name) |
| 307 | paddle_input = tmp.name |
| 308 | |
| 309 | # 尝试新的predict API |
| 310 | try: |
| 311 | results = self._paddle.predict(paddle_input) |
| 312 | if results and len(results) > 0: |
| 313 | result_data = results[0] |
| 314 | if isinstance(result_data, dict): |
| 315 | texts = result_data.get("rec_texts", []) |
| 316 | scores = result_data.get("rec_scores", []) |
| 317 | bboxes = result_data.get("det_polygons", []) |
| 318 | |
| 319 | words: List[OCRWord] = [] |
| 320 | for i, (text, score) in enumerate(zip(texts, scores)): |
| 321 | if i < len(bboxes): |
| 322 | box = bboxes[i] |
| 323 | x1 = int(min(p[0] for p in box)) |
| 324 | y1 = int(min(p[1] for p in box)) |
| 325 | x2 = int(max(p[0] for p in box)) |
| 326 | y2 = int(max(p[1] for p in box)) |
| 327 | else: |
| 328 | x1, y1, x2, y2 = 0, 0, 100, 20 |
| 329 | words.append(OCRWord(text=text, bbox=(x1, y1, x2, y2), conf=float(score))) |
| 330 | return OCRResult(words=words) |
no test coverage detected