Delete overlapped lines. NOTE: Don't run this method until floating images are excluded.
(self, line_overlap_threshold:float)
| 385 | return self |
| 386 | |
| 387 | def _remove_overlapped_lines(self, line_overlap_threshold:float): |
| 388 | '''Delete overlapped lines. |
| 389 | NOTE: Don't run this method until floating images are excluded. |
| 390 | ''' |
| 391 | # group lines by overlap |
| 392 | fun = lambda a, b: a.get_main_bbox(b, threshold=line_overlap_threshold) |
| 393 | groups = self.group(fun) |
| 394 | |
| 395 | # delete overlapped lines |
| 396 | for group in filter(lambda group: len(group)>1, groups): |
| 397 | # keep only the line with largest area |
| 398 | sorted_lines = sorted(group, key=lambda line: line.bbox.get_area()) |
| 399 | for line in sorted_lines[:-1]: |
| 400 | logging.warning('Ignore Line "%s" due to overlap', line.text) |
| 401 | line.update_bbox((0,0,0,0)) |
| 402 | |
| 403 | return self |
| 404 | |
| 405 | |
| 406 |
no test coverage detected