| 63 | pass |
| 64 | |
| 65 | class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy): |
| 66 | def __init__(self, use_cached_html=False, js_code=None, **kwargs): |
| 67 | self.use_cached_html = use_cached_html |
| 68 | self.user_agent = kwargs.get( |
| 69 | "user_agent", |
| 70 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " |
| 71 | "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" |
| 72 | ) |
| 73 | self.proxy = kwargs.get("proxy") |
| 74 | self.proxy_config = kwargs.get("proxy_config") |
| 75 | self.headless = kwargs.get("headless", True) |
| 76 | self.browser_type = kwargs.get("browser_type", "chromium") |
| 77 | self.headers = kwargs.get("headers", {}) |
| 78 | self.sessions = {} |
| 79 | self.session_ttl = 1800 |
| 80 | self.js_code = js_code |
| 81 | self.verbose = kwargs.get("verbose", False) |
| 82 | self.playwright = None |
| 83 | self.browser = None |
| 84 | self.sleep_on_close = kwargs.get("sleep_on_close", False) |
| 85 | self.hooks = { |
| 86 | 'on_browser_created': None, |
| 87 | 'on_user_agent_updated': None, |
| 88 | 'on_execution_started': None, |
| 89 | 'before_goto': None, |
| 90 | 'after_goto': None, |
| 91 | 'before_return_html': None, |
| 92 | 'before_retrieve_html': None |
| 93 | } |
| 94 | |
| 95 | async def __aenter__(self): |
| 96 | await self.start() |
| 97 | return self |
| 98 | |
| 99 | async def __aexit__(self, exc_type, exc_val, exc_tb): |
| 100 | await self.close() |
| 101 | |
| 102 | async def start(self): |
| 103 | if self.playwright is None: |
| 104 | self.playwright = await async_playwright().start() |
| 105 | if self.browser is None: |
| 106 | browser_args = { |
| 107 | "headless": self.headless, |
| 108 | "args": [ |
| 109 | "--disable-gpu", |
| 110 | "--no-sandbox", |
| 111 | "--disable-dev-shm-usage", |
| 112 | "--disable-blink-features=AutomationControlled", |
| 113 | "--disable-infobars", |
| 114 | "--window-position=0,0", |
| 115 | "--ignore-certificate-errors", |
| 116 | "--ignore-certificate-errors-spki-list", |
| 117 | # "--headless=new", # Use the new headless mode |
| 118 | ] |
| 119 | } |
| 120 | |
| 121 | # Add proxy settings if a proxy is specified |
| 122 | if self.proxy: |