(crawler)
| 199 | print_result(result) |
| 200 | |
| 201 | def using_crawler_hooks(crawler): |
| 202 | # Example usage of the hooks for authentication and setting a cookie |
| 203 | def on_driver_created(driver): |
| 204 | print("[HOOK] on_driver_created") |
| 205 | # Example customization: maximize the window |
| 206 | driver.maximize_window() |
| 207 | |
| 208 | # Example customization: logging in to a hypothetical website |
| 209 | driver.get('https://example.com/login') |
| 210 | |
| 211 | from selenium.webdriver.support.ui import WebDriverWait |
| 212 | from selenium.webdriver.common.by import By |
| 213 | from selenium.webdriver.support import expected_conditions as EC |
| 214 | |
| 215 | WebDriverWait(driver, 10).until( |
| 216 | EC.presence_of_element_located((By.NAME, 'username')) |
| 217 | ) |
| 218 | driver.find_element(By.NAME, 'username').send_keys('testuser') |
| 219 | driver.find_element(By.NAME, 'password').send_keys('password123') |
| 220 | driver.find_element(By.NAME, 'login').click() |
| 221 | WebDriverWait(driver, 10).until( |
| 222 | EC.presence_of_element_located((By.ID, 'welcome')) |
| 223 | ) |
| 224 | # Add a custom cookie |
| 225 | driver.add_cookie({'name': 'test_cookie', 'value': 'cookie_value'}) |
| 226 | return driver |
| 227 | |
| 228 | |
| 229 | def before_get_url(driver): |
| 230 | print("[HOOK] before_get_url") |
| 231 | # Example customization: add a custom header |
| 232 | # Enable Network domain for sending headers |
| 233 | driver.execute_cdp_cmd('Network.enable', {}) |
| 234 | # Add a custom header |
| 235 | driver.execute_cdp_cmd('Network.setExtraHTTPHeaders', {'headers': {'X-Test-Header': 'test'}}) |
| 236 | return driver |
| 237 | |
| 238 | def after_get_url(driver): |
| 239 | print("[HOOK] after_get_url") |
| 240 | # Example customization: log the URL |
| 241 | print(driver.current_url) |
| 242 | return driver |
| 243 | |
| 244 | def before_return_html(driver, html): |
| 245 | print("[HOOK] before_return_html") |
| 246 | # Example customization: log the HTML |
| 247 | print(len(html)) |
| 248 | return driver |
| 249 | |
| 250 | cprint("\n🔗 [bold cyan]Using Crawler Hooks: Let's see how we can customize the crawler using hooks![/bold cyan]", True) |
| 251 | |
| 252 | crawler_strategy = LocalSeleniumCrawlerStrategy(verbose=True) |
| 253 | crawler_strategy.set_hook('on_driver_created', on_driver_created) |
| 254 | crawler_strategy.set_hook('before_get_url', before_get_url) |
| 255 | crawler_strategy.set_hook('after_get_url', after_get_url) |
| 256 | crawler_strategy.set_hook('before_return_html', before_return_html) |
| 257 | |
| 258 | crawler = WebCrawler(verbose=True, crawler_strategy=crawler_strategy) |
nothing calls this directly
no test coverage detected
searching dependent graphs…