MCPcopy
hub / github.com/ArtifexSoftware/pdf2docx / _identify_floating_images

Method _identify_floating_images

pdf2docx/layout/Blocks.py:370–385  ·  view source on GitHub ↗

Identify floating image lines and convert to ImageBlock.

(self, float_image_ignorable_gap:float)

Source from the content-addressed store, hash-verified

368 # internal methods
369 # ----------------------------------------------------------------------------------
370 def _identify_floating_images(self, float_image_ignorable_gap:float):
371 '''Identify floating image lines and convert to ImageBlock.'''
372 # group lines by connectivity
373 groups = self.group_by_connectivity(dx=-float_image_ignorable_gap, dy=-float_image_ignorable_gap)
374
375 # identify floating images
376 for group in filter(lambda group: len(group)>1, groups):
377 for line in filter(lambda line: line.image_spans, group):
378 float_image = ImageBlock().from_image(line.spans[0])
379 float_image.set_float_image_block()
380 self._floating_image_blocks.append(float_image)
381
382 # remove the original image line from flow layout by setting empty bbox
383 line.update_bbox((0,0,0,0))
384
385 return self
386
387 def _remove_overlapped_lines(self, line_overlap_threshold:float):
388 '''Delete overlapped lines.

Callers 1

clean_upMethod · 0.80

Calls 6

ImageBlockClass · 0.90
group_by_connectivityMethod · 0.80
from_imageMethod · 0.80
set_float_image_blockMethod · 0.80
appendMethod · 0.45
update_bboxMethod · 0.45

Tested by

no test coverage detected