MCPcopy Index your code
hub / github.com/abi/screenshot-to-code / extract_html_content

Function extract_html_content

backend/codegen/utils.py:4–33  ·  view source on GitHub ↗
(text: str)

Source from the content-addressed store, hash-verified

2
3
4def extract_html_content(text: str) -> str:
5 file_match = re.search(
6 r"<file\s+path=\"[^\"]+\">\s*(.*?)\s*</file>",
7 text,
8 re.DOTALL | re.IGNORECASE,
9 )
10 if file_match:
11 return extract_html_content(file_match.group(1).strip())
12
13 # First, strip markdown code fences if present
14 text = re.sub(r'^```html?\s*\n?', '', text, flags=re.MULTILINE)
15 text = re.sub(r'\n?```\s*$', '', text, flags=re.MULTILINE)
16
17 # Try to find DOCTYPE + html tags together
18 match_with_doctype = re.search(
19 r"(<!DOCTYPE\s+html[^>]*>.*?<html.*?>.*?</html>)", text, re.DOTALL | re.IGNORECASE
20 )
21 if match_with_doctype:
22 return match_with_doctype.group(1)
23
24 # Fall back to just <html> tags
25 match = re.search(r"(<html.*?>.*?</html>)", text, re.DOTALL)
26 if match:
27 return match.group(1)
28 else:
29 # Otherwise, we just send the previous HTML over
30 print(
31 "[HTML Extraction] No <html> tags found in the generated content"
32 )
33 return text

Calls

no outgoing calls