(self)
| 39 | await asyncio.sleep(random.randint(5, 10)) |
| 40 | |
| 41 | async def crawl(self): |
| 42 | # 测试环境下 headless 设置为 False |
| 43 | # 生产环境可以修改为无头浏览器 |
| 44 | self.browser = await launch({ |
| 45 | 'headless': False, |
| 46 | 'userDataDir': cache_dir, |
| 47 | 'defaultViewport': {'width': 1440, 'height': 1000}, |
| 48 | 'args': ['--no-sandbox'] |
| 49 | }) |
| 50 | self.page = await self.browser.newPage() |
| 51 | await self.page.goto(self.url) |
| 52 | |
| 53 | # 伪造当前浏览状态 防止自动化工具检测 |
| 54 | codes = ( |
| 55 | "() =>{ Object.defineProperties(navigator,{ webdriver:" |
| 56 | "{ get: () => false } }) }", |
| 57 | "() =>{ window.navigator.chrome = { runtime: {}, }; }", |
| 58 | "() =>{ Object.defineProperty(navigator, 'languages', " |
| 59 | "{ get: () => ['en-US', 'en'] }); }", |
| 60 | "() =>{ Object.defineProperty(navigator, 'plugins', { " |
| 61 | "get: () => [1, 2, 3, 4, 5,6], }); }" |
| 62 | ) |
| 63 | for code in codes: |
| 64 | await self.page.evaluate(code) |
| 65 | await self.send_key() |
| 66 | |
| 67 | |
| 68 | def main(): |
no test coverage detected