Function skipwrap

crawl4ai/html2text/utils.py:162–196 · view source on GitHub ↗

(
    para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
)

Source from the content-addressed store, hash-verified

160
161
162	def skipwrap(
163	para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
164	) -> bool:
165	# If it appears to contain a link
166	# don't wrap
167	if not wrap_links and config.RE_LINK.search(para):
168	return True
169	# If the text begins with four spaces or one tab, it's a code block;
170	# don't wrap
171	if para[0:4] == " " or para[0] == "\t":
172	return True
173
174	# If the text begins with only two "--", possibly preceded by
175	# whitespace, that's an emdash; so wrap.
176	stripped = para.lstrip()
177	if stripped[0:2] == "--" and len(stripped) > 2 and stripped[2] != "-":
178	return False
179
180	# I'm not sure what this is for; I thought it was to detect lists,
181	# but there's a <br>-inside-<span> case in one of the tests that
182	# also depends upon it.
183	if stripped[0:1] in ("-", "") and not stripped[0:2] == "*":
184	return not wrap_list_items
185
186	# If text contains a pipe character it is likely a table
187	if not wrap_tables and config.RE_TABLE.search(para):
188	return True
189
190	# If the text begins with a single -, *, or +, followed by a space,
191	# or an integer, followed by a ., followed by a space (in either
192	# case optionally proceeded by whitespace), it's a list; don't wrap.
193	return bool(
194	config.RE_ORDERED_LIST_MATCHER.match(stripped)
195	or config.RE_UNORDERED_LIST_MATCHER.match(stripped)
196	)
197
198
199	def escape_md(text: str) -> str:

optwrapMethod · 0.85

no outgoing calls

no test coverage detected

searching dependent graphs…