MCPcopy
hub / github.com/datalab-to/marker / draw_pdf_debug_images

Method draw_pdf_debug_images

marker/processors/debug.py:62–94  ·  view source on GitHub ↗
(self, document: Document)

Source from the content-addressed store, hash-verified

60 logger.info(f"Dumped block debug data to {self.debug_data_folder}")
61
62 def draw_pdf_debug_images(self, document: Document):
63 for page in document.pages:
64 png_image = page.get_image(highres=True).copy()
65
66 line_bboxes = []
67 span_bboxes = []
68 line_ids = []
69 for child in page.children:
70 # Skip any blocks that have been removed
71 if child.removed:
72 continue
73
74 if child.block_type == BlockTypes.Line:
75 bbox = child.polygon.rescale(page.polygon.size, png_image.size).bbox
76 line_bboxes.append(bbox)
77 line_ids.append(child.block_id)
78 elif child.block_type == BlockTypes.Span:
79 bbox = child.polygon.rescale(page.polygon.size, png_image.size).bbox
80 span_bboxes.append(bbox)
81
82 self.render_on_image(
83 line_bboxes,
84 png_image,
85 color="blue",
86 draw_bbox=True,
87 label_font_size=24,
88 labels=[str(i) for i in line_ids],
89 )
90
91 png_image = self.render_layout_boxes(page, png_image)
92
93 debug_file = os.path.join(self.debug_folder, f"pdf_page_{page.page_id}.png")
94 png_image.save(debug_file)
95
96 def draw_layout_debug_images(self, document: Document, pdf_mode=False):
97 for page in document.pages:

Callers 1

__call__Method · 0.95

Calls 4

render_on_imageMethod · 0.95
render_layout_boxesMethod · 0.95
rescaleMethod · 0.80
get_imageMethod · 0.45

Tested by

no test coverage detected