MCPcopy
hub / github.com/unclecode/crawl4ai / AsyncPlaywrightCrawlerStrategy

Class AsyncPlaywrightCrawlerStrategy

crawl4ai/async_crawler_strategy.py:65–721  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

63 pass
64
65class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
66 def __init__(self, use_cached_html=False, js_code=None, **kwargs):
67 self.use_cached_html = use_cached_html
68 self.user_agent = kwargs.get(
69 "user_agent",
70 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
71 "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
72 )
73 self.proxy = kwargs.get("proxy")
74 self.proxy_config = kwargs.get("proxy_config")
75 self.headless = kwargs.get("headless", True)
76 self.browser_type = kwargs.get("browser_type", "chromium")
77 self.headers = kwargs.get("headers", {})
78 self.sessions = {}
79 self.session_ttl = 1800
80 self.js_code = js_code
81 self.verbose = kwargs.get("verbose", False)
82 self.playwright = None
83 self.browser = None
84 self.sleep_on_close = kwargs.get("sleep_on_close", False)
85 self.hooks = {
86 'on_browser_created': None,
87 'on_user_agent_updated': None,
88 'on_execution_started': None,
89 'before_goto': None,
90 'after_goto': None,
91 'before_return_html': None,
92 'before_retrieve_html': None
93 }
94
95 async def __aenter__(self):
96 await self.start()
97 return self
98
99 async def __aexit__(self, exc_type, exc_val, exc_tb):
100 await self.close()
101
102 async def start(self):
103 if self.playwright is None:
104 self.playwright = await async_playwright().start()
105 if self.browser is None:
106 browser_args = {
107 "headless": self.headless,
108 "args": [
109 "--disable-gpu",
110 "--no-sandbox",
111 "--disable-dev-shm-usage",
112 "--disable-blink-features=AutomationControlled",
113 "--disable-infobars",
114 "--window-position=0,0",
115 "--ignore-certificate-errors",
116 "--ignore-certificate-errors-spki-list",
117 # "--headless=new", # Use the new headless mode
118 ]
119 }
120
121 # Add proxy settings if a proxy is specified
122 if self.proxy:

Callers 2

mainFunction · 0.90
__init__Method · 0.70

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…