MCPcopy
hub / github.com/unclecode/crawl4ai / using_crawler_hooks

Function using_crawler_hooks

docs/examples/quickstart_sync.py:201–263  ·  view source on GitHub ↗
(crawler)

Source from the content-addressed store, hash-verified

199 print_result(result)
200
201def using_crawler_hooks(crawler):
202 # Example usage of the hooks for authentication and setting a cookie
203 def on_driver_created(driver):
204 print("[HOOK] on_driver_created")
205 # Example customization: maximize the window
206 driver.maximize_window()
207
208 # Example customization: logging in to a hypothetical website
209 driver.get('https://example.com/login')
210
211 from selenium.webdriver.support.ui import WebDriverWait
212 from selenium.webdriver.common.by import By
213 from selenium.webdriver.support import expected_conditions as EC
214
215 WebDriverWait(driver, 10).until(
216 EC.presence_of_element_located((By.NAME, 'username'))
217 )
218 driver.find_element(By.NAME, 'username').send_keys('testuser')
219 driver.find_element(By.NAME, 'password').send_keys('password123')
220 driver.find_element(By.NAME, 'login').click()
221 WebDriverWait(driver, 10).until(
222 EC.presence_of_element_located((By.ID, 'welcome'))
223 )
224 # Add a custom cookie
225 driver.add_cookie({'name': 'test_cookie', 'value': 'cookie_value'})
226 return driver
227
228
229 def before_get_url(driver):
230 print("[HOOK] before_get_url")
231 # Example customization: add a custom header
232 # Enable Network domain for sending headers
233 driver.execute_cdp_cmd('Network.enable', {})
234 # Add a custom header
235 driver.execute_cdp_cmd('Network.setExtraHTTPHeaders', {'headers': {'X-Test-Header': 'test'}})
236 return driver
237
238 def after_get_url(driver):
239 print("[HOOK] after_get_url")
240 # Example customization: log the URL
241 print(driver.current_url)
242 return driver
243
244 def before_return_html(driver, html):
245 print("[HOOK] before_return_html")
246 # Example customization: log the HTML
247 print(len(html))
248 return driver
249
250 cprint("\n🔗 [bold cyan]Using Crawler Hooks: Let's see how we can customize the crawler using hooks![/bold cyan]", True)
251
252 crawler_strategy = LocalSeleniumCrawlerStrategy(verbose=True)
253 crawler_strategy.set_hook('on_driver_created', on_driver_created)
254 crawler_strategy.set_hook('before_get_url', before_get_url)
255 crawler_strategy.set_hook('after_get_url', after_get_url)
256 crawler_strategy.set_hook('before_return_html', before_return_html)
257
258 crawler = WebCrawler(verbose=True, crawler_strategy=crawler_strategy)

Callers

nothing calls this directly

Calls 7

set_hookMethod · 0.95
warmupMethod · 0.95
runMethod · 0.95
WebCrawlerClass · 0.90
cprintFunction · 0.85
print_resultFunction · 0.85

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…