MCPcopy
hub / github.com/unclecode/crawl4ai / optwrap

Method optwrap

crawl4ai/html2text/__init__.py:947–1007  ·  view source on GitHub ↗

Wrap all paragraphs in the provided text. :type text: str :rtype: str

(self, text: str)

Source from the content-addressed store, hash-verified

945 return nest_count
946
947 def optwrap(self, text: str) -> str:
948 """
949 Wrap all paragraphs in the provided text.
950
951 :type text: str
952
953 :rtype: str
954 """
955 if not self.body_width:
956 return text
957
958 result = ""
959 newlines = 0
960 # I cannot think of a better solution for now.
961 # To avoid the non-wrap behaviour for entire paras
962 # because of the presence of a link in it
963 if not self.wrap_links:
964 self.inline_links = False
965 for para in text.split("\n"):
966 if len(para) > 0:
967 if not skipwrap(
968 para, self.wrap_links, self.wrap_list_items, self.wrap_tables
969 ):
970 indent = ""
971 if para.startswith(" " + self.ul_item_mark):
972 # list item continuation: add a double indent to the
973 # new lines
974 indent = " "
975 elif para.startswith("> "):
976 # blockquote continuation: add the greater than symbol
977 # to the new lines
978 indent = "> "
979 wrapped = wrap(
980 para,
981 self.body_width,
982 break_long_words=False,
983 subsequent_indent=indent,
984 )
985 result += "\n".join(wrapped)
986 if para.endswith(" "):
987 result += " \n"
988 newlines = 1
989 elif indent:
990 result += "\n"
991 newlines = 1
992 else:
993 result += "\n\n"
994 newlines = 2
995 else:
996 # Warning for the tempted!!!
997 # Be aware that obvious replacement of this with
998 # line.isspace()
999 # DOES NOT work! Explanations are welcome.
1000 if not config.RE_SPACE.match(para):
1001 result += para + "\n"
1002 newlines = 1
1003 else:
1004 if newlines < 2:

Callers 1

handleMethod · 0.95

Calls 1

skipwrapFunction · 0.85

Tested by

no test coverage detected