(self, tables: List[TableResult], table_data: list)
| 425 | table.cells = new_cells |
| 426 | |
| 427 | def assign_text_to_cells(self, tables: List[TableResult], table_data: list): |
| 428 | for table_result, table_page_data in zip(tables, table_data): |
| 429 | if table_page_data["ocr_block"]: |
| 430 | continue |
| 431 | |
| 432 | table_text_lines = table_page_data["table_text_lines"] |
| 433 | table_cells: List[SuryaTableCell] = table_result.cells |
| 434 | text_line_bboxes = [t["bbox"] for t in table_text_lines] |
| 435 | table_cell_bboxes = [c.bbox for c in table_cells] |
| 436 | |
| 437 | intersection_matrix = matrix_intersection_area( |
| 438 | text_line_bboxes, table_cell_bboxes |
| 439 | ) |
| 440 | |
| 441 | cell_text = defaultdict(list) |
| 442 | for text_line_idx, table_text_line in enumerate(table_text_lines): |
| 443 | intersections = intersection_matrix[text_line_idx] |
| 444 | if intersections.sum() == 0: |
| 445 | continue |
| 446 | |
| 447 | max_intersection = intersections.argmax() |
| 448 | cell_text[max_intersection].append(table_text_line) |
| 449 | |
| 450 | for k in cell_text: |
| 451 | # TODO: see if the text needs to be sorted (based on rotation) |
| 452 | text = cell_text[k] |
| 453 | assert all("text" in t for t in text), "All text lines must have text" |
| 454 | assert all("bbox" in t for t in text), "All text lines must have a bbox" |
| 455 | table_cells[k].text_lines = text |
| 456 | |
| 457 | def assign_pdftext_lines(self, extract_blocks: list, filepath: str): |
| 458 | table_inputs = [] |
no test coverage detected