| 155 | black_listed_elements = set(["html", "head", "title", "meta", "iframe", "body", "script", "style", "path", "svg", "br", "::marker",]) |
| 156 | |
| 157 | class Crawler: |
| 158 | def __init__(self): |
| 159 | self.browser = ( |
| 160 | sync_playwright() |
| 161 | .start() |
| 162 | .chromium.launch( |
| 163 | headless=True, |
| 164 | ) |
| 165 | ) |
| 166 | |
| 167 | self.page = self.browser.new_page() |
| 168 | self.page.set_viewport_size({"width": 1280, "height": 1080}) |
| 169 | |
| 170 | def screenshot(self, project_name): |
| 171 | screenshots_save_path = Config().get_screenshots_dir() |
| 172 | |
| 173 | page_metadata = self.page.evaluate("() => { return { url: document.location.href, title: document.title } }") |
| 174 | page_url = page_metadata['url'] |
| 175 | random_filename = os.urandom(20).hex() |
| 176 | filename_to_save = f"{random_filename}.png" |
| 177 | path_to_save = os.path.join(screenshots_save_path, filename_to_save) |
| 178 | |
| 179 | self.page.emulate_media(media="screen") |
| 180 | self.page.screenshot(path=path_to_save) |
| 181 | |
| 182 | new_state = AgentState().new_state() |
| 183 | new_state["internal_monologue"] = "Browsing the web right now..." |
| 184 | new_state["browser_session"]["url"] = page_url |
| 185 | new_state["browser_session"]["screenshot"] = path_to_save |
| 186 | AgentState().add_to_current_state(project_name, new_state) |
| 187 | |
| 188 | return path_to_save |
| 189 | |
| 190 | def go_to_page(self, url): |
| 191 | self.page.goto(url=url if "://" in url else "http://" + url) |
| 192 | self.client = self.page.context.new_cdp_session(self.page) |
| 193 | self.page_element_buffer = {} |
| 194 | |
| 195 | def scroll(self, direction): |
| 196 | if direction == "up": |
| 197 | self.page.evaluate( |
| 198 | "(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop - window.innerHeight;" |
| 199 | ) |
| 200 | elif direction == "down": |
| 201 | self.page.evaluate( |
| 202 | "(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop + window.innerHeight;" |
| 203 | ) |
| 204 | |
| 205 | def click(self, id): |
| 206 | # Inject javascript into the page which removes the target= attribute from all links |
| 207 | js = """ |
| 208 | links = document.getElementsByTagName("a"); |
| 209 | for (var i = 0; i < links.length; i++) { |
| 210 | links[i].removeAttribute("target"); |
| 211 | } |
| 212 | """ |
| 213 | self.page.evaluate(js) |
| 214 | |