(crawler)
| 56 | print_result(result) |
| 57 | |
| 58 | def understanding_parameters(crawler): |
| 59 | cprint("\n🧠 [bold cyan]Understanding 'bypass_cache' and 'include_raw_html' parameters:[/bold cyan]") |
| 60 | cprint("By default, Crawl4ai caches the results of your crawls. This means that subsequent crawls of the same URL will be much faster! Let's see this in action.") |
| 61 | |
| 62 | # First crawl (reads from cache) |
| 63 | cprint("1️⃣ First crawl (caches the result):", True) |
| 64 | start_time = time.time() |
| 65 | result = crawler.run(url="https://www.nbcnews.com/business") |
| 66 | end_time = time.time() |
| 67 | cprint(f"[LOG] 📦 [bold yellow]First crawl took {end_time - start_time} seconds and result (from cache):[/bold yellow]") |
| 68 | print_result(result) |
| 69 | |
| 70 | # Force to crawl again |
| 71 | cprint("2️⃣ Second crawl (Force to crawl again):", True) |
| 72 | start_time = time.time() |
| 73 | result = crawler.run(url="https://www.nbcnews.com/business", bypass_cache=True) |
| 74 | end_time = time.time() |
| 75 | cprint(f"[LOG] 📦 [bold yellow]Second crawl took {end_time - start_time} seconds and result (forced to crawl):[/bold yellow]") |
| 76 | print_result(result) |
| 77 | |
| 78 | def add_chunking_strategy(crawler): |
| 79 | # Adding a chunking strategy: RegexChunking |
no test coverage detected
searching dependent graphs…