MCPcopy
hub / github.com/IPADS-SAI/MobiAgent / run

Method run

utils/ocr_engine.py:273–407  ·  view source on GitHub ↗

运行OCR识别 Args: img: 输入图像,可以是文件路径、PIL图像或numpy数组 Returns: OCR识别结果

(self, img: Any)

Source from the content-addressed store, hash-verified

271 return img
272
273 def run(self, img: Any) -> OCRResult:
274 """
275 运行OCR识别
276
277 Args:
278 img: 输入图像,可以是文件路径、PIL图像或numpy数组
279
280 Returns:
281 OCR识别结果
282 """
283 # 1) PaddleOCR路径
284 if self._paddle is not None:
285 try:
286 logger.debug("尝试使用PaddleOCR识别")
287 processed_img = self._resize_image_if_needed(img, max_side=4000)
288
289 # 准备PaddleOCR的输入
290 paddle_input = None
291 if isinstance(processed_img, str):
292 paddle_input = processed_img
293 else:
294 try:
295 import numpy as np
296 if isinstance(processed_img, np.ndarray):
297 paddle_input = processed_img
298 else:
299 pil = self._to_pil(processed_img)
300 paddle_input = np.array(pil)
301 except ImportError:
302 pil = self._to_pil(processed_img)
303 # 如果无法转换为numpy,保存临时文件
304 import tempfile
305 with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
306 pil.save(tmp.name)
307 paddle_input = tmp.name
308
309 # 尝试新的predict API
310 try:
311 results = self._paddle.predict(paddle_input)
312 if results and len(results) > 0:
313 result_data = results[0]
314 if isinstance(result_data, dict):
315 texts = result_data.get("rec_texts", [])
316 scores = result_data.get("rec_scores", [])
317 bboxes = result_data.get("det_polygons", [])
318
319 words: List[OCRWord] = []
320 for i, (text, score) in enumerate(zip(texts, scores)):
321 if i < len(bboxes):
322 box = bboxes[i]
323 x1 = int(min(p[0] for p in box))
324 y1 = int(min(p[1] for p in box))
325 x2 = int(max(p[0] for p in box))
326 y2 = int(max(p[1] for p in box))
327 else:
328 x1, y1, x2, y2 = 0, 0, 100, 20
329 words.append(OCRWord(text=text, bbox=(x1, y1, x2, y2), conf=float(score)))
330 return OCRResult(words=words)

Callers 5

ocr_imageFunction · 0.95
mainFunction · 0.45
server.pyFile · 0.45
server.pyFile · 0.45

Calls 8

_to_pilMethod · 0.95
OCRWordClass · 0.85
OCRResultClass · 0.85
debugMethod · 0.80
errorMethod · 0.80
warningMethod · 0.80

Tested by

no test coverage detected