| 28 | |
| 29 | |
| 30 | class WebDriver: |
| 31 | def __init__(self): |
| 32 | if not WEB_AVAILABLE: |
| 33 | raise ValueError( |
| 34 | "Web imports are unavailable. You must install the [web] extra and chrome or" " chromium system-wide." |
| 35 | ) |
| 36 | |
| 37 | self._reinit_driver() |
| 38 | |
| 39 | def _reinit_driver(self): |
| 40 | options = Options() |
| 41 | options.headless = True |
| 42 | options.add_argument("--window-size=1920,1200") |
| 43 | if os.geteuid() == 0: |
| 44 | options.add_argument("--no-sandbox") |
| 45 | |
| 46 | self.driver = webdriver.Chrome( |
| 47 | options=options, executable_path=ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install() |
| 48 | ) |
| 49 | |
| 50 | def get(self, page, retry=True): |
| 51 | try: |
| 52 | self.driver.get(page) |
| 53 | except exceptions.InvalidSessionIdException: |
| 54 | if retry: |
| 55 | # Forgive an invalid session once and try again |
| 56 | self._reinit_driver() |
| 57 | return self.get(page, retry=False) |
| 58 | else: |
| 59 | raise |
| 60 | |
| 61 | def get_html(self, html): |
| 62 | # https://stackoverflow.com/questions/22538457/put-a-string-with-html-javascript-into-selenium-webdriver |
| 63 | self.get("data:text/html;charset=utf-8," + html) |
| 64 | |
| 65 | def find_word_boxes(self): |
| 66 | # Assumes the driver has been pointed at the right website already |
| 67 | return self.driver.execute_script( |
| 68 | self.lib_js |
| 69 | + """ |
| 70 | return computeBoundingBoxes(document.body); |
| 71 | """ |
| 72 | ) |
| 73 | |
| 74 | # TODO: Handle horizontal scrolling |
| 75 | def scroll_and_screenshot(self): |
| 76 | tops = [] |
| 77 | images = [] |
| 78 | dims = self.driver.execute_script( |
| 79 | self.lib_js |
| 80 | + """ |
| 81 | return computeViewport() |
| 82 | """ |
| 83 | ) |
| 84 | |
| 85 | view_height = dims["vh"] |
| 86 | doc_height = dims["dh"] |
| 87 | |