MCPcopy
hub / github.com/nat/natbot / Crawler

Class Crawler

natbot.py:163–540  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

161black_listed_elements = set(["html", "head", "title", "meta", "iframe", "body", "script", "style", "path", "svg", "br", "::marker",])
162
163class Crawler:
164 def __init__(self):
165 self.browser = (
166 sync_playwright()
167 .start()
168 .chromium.launch(
169 headless=False,
170 )
171 )
172
173 self.page = self.browser.new_page()
174 self.page.set_viewport_size({"width": 1280, "height": 1080})
175
176 def go_to_page(self, url):
177 self.page.goto(url=url if "://" in url else "http://" + url)
178 self.client = self.page.context.new_cdp_session(self.page)
179 self.page_element_buffer = {}
180
181 def scroll(self, direction):
182 if direction == "up":
183 self.page.evaluate(
184 "(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop - window.innerHeight;"
185 )
186 elif direction == "down":
187 self.page.evaluate(
188 "(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop + window.innerHeight;"
189 )
190
191 def click(self, id):
192 # Inject javascript into the page which removes the target= attribute from all links
193 js = """
194 links = document.getElementsByTagName("a");
195 for (var i = 0; i < links.length; i++) {
196 links[i].removeAttribute("target");
197 }
198 """
199 self.page.evaluate(js)
200
201 element = self.page_element_buffer.get(int(id))
202 if element:
203 x = element.get("center_x")
204 y = element.get("center_y")
205
206 self.page.mouse.click(x, y)
207 else:
208 print("Could not find element")
209
210 def type(self, id, text):
211 self.click(id)
212 self.page.keyboard.type(text)
213
214 def enter(self):
215 self.page.keyboard.press("Enter")
216
217 def crawl(self):
218 page = self.page
219 page_element_buffer = self.page_element_buffer
220 start = time.time()

Callers 1

natbot.pyFile · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected