MCPcopy
hub / github.com/docling-project/docling / _flush_buffer

Method _flush_buffer

docling/backend/html_backend.py:1556–1617  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

1554 buffer: AnnotatedTextList = AnnotatedTextList()
1555
1556 def _flush_buffer() -> None:
1557 if not buffer:
1558 return
1559 annotated_text_list: AnnotatedTextList = buffer.simplify_text_elements()
1560 parts = annotated_text_list.split_by_newline()
1561 buffer.clear()
1562
1563 if not "".join([el.text for el in annotated_text_list]):
1564 return
1565
1566 for annotated_text_list in parts:
1567 compacted_parts = self._compact_adjacent_single_char_parts(
1568 annotated_text_list
1569 )
1570 force_inline_group = (
1571 len(annotated_text_list) == 1
1572 and bool(annotated_text_list[0].code)
1573 and element.name not in {"p", "pre"}
1574 )
1575 with self._use_inline_group(
1576 annotated_text_list, doc, force=force_inline_group
1577 ) as inline_ref:
1578 for annotated_text, source_tag_ids in compacted_parts:
1579 if annotated_text.text.strip():
1580 seg_clean = HTMLDocumentBackend._clean_unicode(
1581 annotated_text.text.strip()
1582 )
1583 if annotated_text.code:
1584 prov = self._make_text_prov_for_source_tag_ids(
1585 text=seg_clean,
1586 tag=element,
1587 source_tag_ids=source_tag_ids,
1588 )
1589 docling_code2 = doc.add_code(
1590 parent=self.parents[self.level],
1591 text=seg_clean,
1592 content_layer=self.content_layer,
1593 formatting=annotated_text.formatting,
1594 hyperlink=annotated_text.hyperlink,
1595 prov=prov,
1596 )
1597 if inline_ref is None:
1598 added_refs.append(docling_code2.get_ref())
1599 else:
1600 prov = self._make_text_prov_for_source_tag_ids(
1601 text=seg_clean,
1602 tag=element,
1603 source_tag_ids=source_tag_ids,
1604 )
1605 docling_text2 = doc.add_text(
1606 parent=self.parents[self.level],
1607 label=DocItemLabel.TEXT,
1608 text=seg_clean,
1609 content_layer=self.content_layer,
1610 formatting=annotated_text.formatting,
1611 hyperlink=annotated_text.hyperlink,
1612 prov=prov,
1613 )

Callers

nothing calls this directly

Tested by

no test coverage detected