MCPcopy Index your code
hub / github.com/unclecode/crawl4ai / handle_data

Method handle_data

crawl4ai/html2text/__init__.py:867–908  ·  view source on GitHub ↗
(self, data: str, entity_char: bool = False)

Source from the content-addressed store, hash-verified

865 self.outcount += 1
866
867 def handle_data(self, data: str, entity_char: bool = False) -> None:
868 if not data:
869 # Data may be empty for some HTML entities. For example,
870 # LEFT-TO-RIGHT MARK.
871 return
872
873 if self.stressed:
874 data = data.strip()
875 self.stressed = False
876 self.preceding_stressed = True
877 elif self.preceding_stressed:
878 if (
879 re.match(r"[^][(){}\s.!?]", data[0])
880 and not hn(self.current_tag)
881 and self.current_tag not in ["a", "code", "pre"]
882 ):
883 # should match a letter or common punctuation
884 data = " " + data
885 self.preceding_stressed = False
886
887 if self.style:
888 self.style_def.update(dumb_css_parser(data))
889
890 if self.maybe_automatic_link is not None:
891 href = self.maybe_automatic_link
892 if (
893 href == data
894 and self.absolute_url_matcher.match(href)
895 and self.use_automatic_links
896 ):
897 self.o("<" + data + ">")
898 self.empty_link = False
899 return
900 else:
901 self.o("[")
902 self.maybe_automatic_link = None
903 self.empty_link = False
904
905 if not self.code and not self.pre and not entity_char:
906 data = escape_md_section(data, snob=self.escape_snob, escape_dot=self.escape_dot, escape_plus=self.escape_plus, escape_dash=self.escape_dash)
907 self.preceding_data = data
908 self.o(data, puredata=True)
909
910 def charref(self, name: str) -> str:
911 if name[0] in ["x", "X"]:

Callers 2

handle_charrefMethod · 0.95
handle_entityrefMethod · 0.95

Calls 4

oMethod · 0.95
hnFunction · 0.85
dumb_css_parserFunction · 0.85
escape_md_sectionFunction · 0.85

Tested by

no test coverage detected