Class IframeParser

tests/selenium/test_selenium.py:67–91 · view source on GitHub ↗

Extract the iframes from an html page.

Source from the content-addressed store, hash-verified

65
66
67	class IframeParser(HTMLParser):
68	"""Extract the iframes from an html page."""
69
70	def __init__(self):
71	super().__init__()
72	self.iframes = []
73
74	def handle_starttag(self, tag, attrs):
75	if tag == "iframe":
76	attrs = dict(attrs)
77	if "srcdoc" in attrs:
78	html_bytes = attrs["srcdoc"].encode()
79	elif "data-html" in attrs: # legacy
80	data_html = attrs["data-html"]
81	if "%" in data_html[:20]:
82	# newest branca version: data-html is percent-encoded
83	html_bytes = unquote(data_html).encode()
84	else:
85	# legacy branca version: data-html is base64 encoded
86	html_bytes = base64.b64decode(data_html)
87	else: # legacy
88	src = attrs["src"]
89	html_base64 = src.split(",")[-1]
90	html_bytes = base64.b64decode(html_base64)
91	self.iframes.append(html_bytes)

get_notebook_htmlFunction · 0.85

no outgoing calls

get_notebook_htmlFunction · 0.68