(self, document: Document)
| 54 | return 6 |
| 55 | |
| 56 | def __call__(self, document: Document): |
| 57 | images = [] |
| 58 | equation_boxes = [] |
| 59 | equation_block_ids = [] |
| 60 | total_equation_blocks = 0 |
| 61 | |
| 62 | for page in document.pages: |
| 63 | page_image = page.get_image(highres=True) |
| 64 | page_size = page.polygon.width, page.polygon.height |
| 65 | image_size = page_image.size |
| 66 | |
| 67 | page_equation_boxes = [] |
| 68 | page_equation_block_ids = [] |
| 69 | equation_blocks = page.contained_blocks(document, self.block_types) |
| 70 | for block in equation_blocks: |
| 71 | page_equation_boxes.append( |
| 72 | block.polygon.rescale(page_size, image_size).bbox |
| 73 | ) |
| 74 | page_equation_block_ids.append(block.id) |
| 75 | total_equation_blocks += 1 |
| 76 | |
| 77 | images.append(page_image) |
| 78 | equation_boxes.append(page_equation_boxes) |
| 79 | equation_block_ids.append(page_equation_block_ids) |
| 80 | |
| 81 | if total_equation_blocks == 0: |
| 82 | return |
| 83 | |
| 84 | predictions = self.get_latex_batched(images, equation_boxes) |
| 85 | for page_predictions, page_equation_block_ids in zip( |
| 86 | predictions, equation_block_ids |
| 87 | ): |
| 88 | assert len(page_predictions) == len(page_equation_block_ids), ( |
| 89 | "Every equation block should have a corresponding prediction" |
| 90 | ) |
| 91 | for block_prediction, block_id in zip( |
| 92 | page_predictions, page_equation_block_ids |
| 93 | ): |
| 94 | block = document.get_block(block_id) |
| 95 | block.html = self.fix_latex(block_prediction) |
| 96 | |
| 97 | def fix_latex(self, math_html: str): |
| 98 | math_html = math_html.strip() |
nothing calls this directly
no test coverage detected