MCPcopy
hub / github.com/impira/docquery / WebDriver

Class WebDriver

src/docquery/web.py:30–122  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

28
29
30class WebDriver:
31 def __init__(self):
32 if not WEB_AVAILABLE:
33 raise ValueError(
34 "Web imports are unavailable. You must install the [web] extra and chrome or" " chromium system-wide."
35 )
36
37 self._reinit_driver()
38
39 def _reinit_driver(self):
40 options = Options()
41 options.headless = True
42 options.add_argument("--window-size=1920,1200")
43 if os.geteuid() == 0:
44 options.add_argument("--no-sandbox")
45
46 self.driver = webdriver.Chrome(
47 options=options, executable_path=ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install()
48 )
49
50 def get(self, page, retry=True):
51 try:
52 self.driver.get(page)
53 except exceptions.InvalidSessionIdException:
54 if retry:
55 # Forgive an invalid session once and try again
56 self._reinit_driver()
57 return self.get(page, retry=False)
58 else:
59 raise
60
61 def get_html(self, html):
62 # https://stackoverflow.com/questions/22538457/put-a-string-with-html-javascript-into-selenium-webdriver
63 self.get("data:text/html;charset=utf-8," + html)
64
65 def find_word_boxes(self):
66 # Assumes the driver has been pointed at the right website already
67 return self.driver.execute_script(
68 self.lib_js
69 + """
70 return computeBoundingBoxes(document.body);
71 """
72 )
73
74 # TODO: Handle horizontal scrolling
75 def scroll_and_screenshot(self):
76 tops = []
77 images = []
78 dims = self.driver.execute_script(
79 self.lib_js
80 + """
81 return computeViewport()
82 """
83 )
84
85 view_height = dims["vh"]
86 doc_height = dims["dh"]
87

Callers 1

get_webdriverFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected