MCPcopy
hub / github.com/datalab-to/marker / __call__

Method __call__

marker/processors/equation.py:56–95  ·  view source on GitHub ↗
(self, document: Document)

Source from the content-addressed store, hash-verified

54 return 6
55
56 def __call__(self, document: Document):
57 images = []
58 equation_boxes = []
59 equation_block_ids = []
60 total_equation_blocks = 0
61
62 for page in document.pages:
63 page_image = page.get_image(highres=True)
64 page_size = page.polygon.width, page.polygon.height
65 image_size = page_image.size
66
67 page_equation_boxes = []
68 page_equation_block_ids = []
69 equation_blocks = page.contained_blocks(document, self.block_types)
70 for block in equation_blocks:
71 page_equation_boxes.append(
72 block.polygon.rescale(page_size, image_size).bbox
73 )
74 page_equation_block_ids.append(block.id)
75 total_equation_blocks += 1
76
77 images.append(page_image)
78 equation_boxes.append(page_equation_boxes)
79 equation_block_ids.append(page_equation_block_ids)
80
81 if total_equation_blocks == 0:
82 return
83
84 predictions = self.get_latex_batched(images, equation_boxes)
85 for page_predictions, page_equation_block_ids in zip(
86 predictions, equation_block_ids
87 ):
88 assert len(page_predictions) == len(page_equation_block_ids), (
89 "Every equation block should have a corresponding prediction"
90 )
91 for block_prediction, block_id in zip(
92 page_predictions, page_equation_block_ids
93 ):
94 block = document.get_block(block_id)
95 block.html = self.fix_latex(block_prediction)
96
97 def fix_latex(self, math_html: str):
98 math_html = math_html.strip()

Callers

nothing calls this directly

Calls 6

get_latex_batchedMethod · 0.95
fix_latexMethod · 0.95
rescaleMethod · 0.80
get_imageMethod · 0.45
contained_blocksMethod · 0.45
get_blockMethod · 0.45

Tested by

no test coverage detected