MCPcopy Index your code
hub / github.com/unclecode/crawl4ai / o

Method o

crawl4ai/html2text/__init__.py:755–865  ·  view source on GitHub ↗

Deal with indentation and whitespace

(
        self, data: str, puredata: bool = False, force: Union[bool, str] = False
    )

Source from the content-addressed store, hash-verified

753 self.br_toggle = " "
754
755 def o(
756 self, data: str, puredata: bool = False, force: Union[bool, str] = False
757 ) -> None:
758 """
759 Deal with indentation and whitespace
760 """
761 if self.abbr_data is not None:
762 self.abbr_data += data
763
764 if not self.quiet:
765 if self.google_doc:
766 # prevent white space immediately after 'begin emphasis'
767 # marks ('**' and '_')
768 lstripped_data = data.lstrip()
769 if self.drop_white_space and not (self.pre or self.code):
770 data = lstripped_data
771 if lstripped_data != "":
772 self.drop_white_space = 0
773
774 if puredata and not self.pre:
775 # This is a very dangerous call ... it could mess up
776 # all handling of   when not handled properly
777 # (see entityref)
778 data = re.sub(r"\s+", r" ", data)
779 if data and data[0] == " ":
780 self.space = True
781 data = data[1:]
782 if not data and not force:
783 return
784
785 if self.startpre:
786 # self.out(" :") #TODO: not output when already one there
787 if not data.startswith("\n") and not data.startswith("\r\n"):
788 # <pre>stuff...
789 data = "\n" + data
790 if self.mark_code:
791 self.out("\n[code]")
792 self.p_p = 0
793
794 bq = ">" * self.blockquote
795 if not (force and data and data[0] == ">") and self.blockquote:
796 bq += " "
797
798 if self.pre:
799 if not self.list:
800 bq += " "
801 # else: list content is already partially indented
802 bq += " " * len(self.list)
803 data = data.replace("\n", "\n" + bq)
804
805 if self.startpre:
806 self.startpre = False
807 if self.list:
808 # use existing initial indentation
809 data = data.lstrip("\n")
810
811 if self.start:
812 self.space = False

Callers 6

finishMethod · 0.95
handle_emphasisMethod · 0.95
handle_tagMethod · 0.95
link_urlMethod · 0.95
handle_dataMethod · 0.95
handle_tagMethod · 0.80

Calls

no outgoing calls

Tested by

no test coverage detected