MCPcopy
hub / github.com/nat/natbot / crawl

Method crawl

natbot.py:217–540  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

215 self.page.keyboard.press("Enter")
216
217 def crawl(self):
218 page = self.page
219 page_element_buffer = self.page_element_buffer
220 start = time.time()
221
222 page_state_as_text = []
223
224 device_pixel_ratio = page.evaluate("window.devicePixelRatio")
225 if platform == "darwin" and device_pixel_ratio == 1: # lies
226 device_pixel_ratio = 2
227
228 win_scroll_x = page.evaluate("window.scrollX")
229 win_scroll_y = page.evaluate("window.scrollY")
230 win_upper_bound = page.evaluate("window.pageYOffset")
231 win_left_bound = page.evaluate("window.pageXOffset")
232 win_width = page.evaluate("window.screen.width")
233 win_height = page.evaluate("window.screen.height")
234 win_right_bound = win_left_bound + win_width
235 win_lower_bound = win_upper_bound + win_height
236 document_offset_height = page.evaluate("document.body.offsetHeight")
237 document_scroll_height = page.evaluate("document.body.scrollHeight")
238
239# percentage_progress_start = (win_upper_bound / document_scroll_height) * 100
240# percentage_progress_end = (
241# (win_height + win_upper_bound) / document_scroll_height
242# ) * 100
243 percentage_progress_start = 1
244 percentage_progress_end = 2
245
246 page_state_as_text.append(
247 {
248 "x": 0,
249 "y": 0,
250 "text": "[scrollbar {:0.2f}-{:0.2f}%]".format(
251 round(percentage_progress_start, 2), round(percentage_progress_end)
252 ),
253 }
254 )
255
256 tree = self.client.send(
257 "DOMSnapshot.captureSnapshot",
258 {"computedStyles": [], "includeDOMRects": True, "includePaintOrder": True},
259 )
260 strings = tree["strings"]
261 document = tree["documents"][0]
262 nodes = document["nodes"]
263 backend_node_id = nodes["backendNodeId"]
264 attributes = nodes["attributes"]
265 node_value = nodes["nodeValue"]
266 parent = nodes["parentIndex"]
267 node_types = nodes["nodeType"]
268 node_names = nodes["nodeName"]
269 is_clickable = set(nodes["isClickable"]["index"])
270
271 text_value = nodes["textValue"]
272 text_value_index = text_value["index"]
273 text_value_values = text_value["value"]
274

Callers 1

natbot.pyFile · 0.80

Calls

no outgoing calls

Tested by

no test coverage detected