MCPcopy
hub / github.com/unclecode/crawl4ai / skipwrap

Function skipwrap

crawl4ai/html2text/utils.py:162–196  ·  view source on GitHub ↗
(
    para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
)

Source from the content-addressed store, hash-verified

160
161
162def skipwrap(
163 para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
164) -> bool:
165 # If it appears to contain a link
166 # don't wrap
167 if not wrap_links and config.RE_LINK.search(para):
168 return True
169 # If the text begins with four spaces or one tab, it's a code block;
170 # don't wrap
171 if para[0:4] == " " or para[0] == "\t":
172 return True
173
174 # If the text begins with only two "--", possibly preceded by
175 # whitespace, that's an emdash; so wrap.
176 stripped = para.lstrip()
177 if stripped[0:2] == "--" and len(stripped) > 2 and stripped[2] != "-":
178 return False
179
180 # I'm not sure what this is for; I thought it was to detect lists,
181 # but there's a <br>-inside-<span> case in one of the tests that
182 # also depends upon it.
183 if stripped[0:1] in ("-", "*") and not stripped[0:2] == "**":
184 return not wrap_list_items
185
186 # If text contains a pipe character it is likely a table
187 if not wrap_tables and config.RE_TABLE.search(para):
188 return True
189
190 # If the text begins with a single -, *, or +, followed by a space,
191 # or an integer, followed by a ., followed by a space (in either
192 # case optionally proceeded by whitespace), it's a list; don't wrap.
193 return bool(
194 config.RE_ORDERED_LIST_MATCHER.match(stripped)
195 or config.RE_UNORDERED_LIST_MATCHER.match(stripped)
196 )
197
198
199def escape_md(text: str) -> str:

Callers 1

optwrapMethod · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…