()
| 392 | print(result.markdown) |
| 393 | |
| 394 | async def speed_comparison(): |
| 395 | # print("\n--- Speed Comparison ---") |
| 396 | # print("Firecrawl (simulated):") |
| 397 | # print("Time taken: 7.02 seconds") |
| 398 | # print("Content length: 42074 characters") |
| 399 | # print("Images found: 49") |
| 400 | # print() |
| 401 | # Simulated Firecrawl performance |
| 402 | from firecrawl import FirecrawlApp |
| 403 | app = FirecrawlApp(api_key=os.environ['FIRECRAWL_API_KEY']) |
| 404 | start = time.time() |
| 405 | scrape_status = app.scrape_url( |
| 406 | 'https://www.nbcnews.com/business', |
| 407 | params={'formats': ['markdown', 'html']} |
| 408 | ) |
| 409 | end = time.time() |
| 410 | print("Firecrawl (simulated):") |
| 411 | print(f"Time taken: {end - start:.2f} seconds") |
| 412 | print(f"Content length: {len(scrape_status['markdown'])} characters") |
| 413 | print(f"Images found: {scrape_status['markdown'].count('cldnry.s-nbcnews.com')}") |
| 414 | print() |
| 415 | |
| 416 | async with AsyncWebCrawler() as crawler: |
| 417 | # Crawl4AI simple crawl |
| 418 | start = time.time() |
| 419 | result = await crawler.arun( |
| 420 | url="https://www.nbcnews.com/business", |
| 421 | word_count_threshold=0, |
| 422 | bypass_cache=True, |
| 423 | verbose=False, |
| 424 | ) |
| 425 | end = time.time() |
| 426 | print("Crawl4AI (simple crawl):") |
| 427 | print(f"Time taken: {end - start:.2f} seconds") |
| 428 | print(f"Content length: {len(result.markdown)} characters") |
| 429 | print(f"Images found: {result.markdown.count('cldnry.s-nbcnews.com')}") |
| 430 | print() |
| 431 | |
| 432 | # Crawl4AI with JavaScript execution |
| 433 | start = time.time() |
| 434 | result = await crawler.arun( |
| 435 | url="https://www.nbcnews.com/business", |
| 436 | js_code=[ |
| 437 | "const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); loadMoreButton && loadMoreButton.click();" |
| 438 | ], |
| 439 | word_count_threshold=0, |
| 440 | bypass_cache=True, |
| 441 | verbose=False, |
| 442 | ) |
| 443 | end = time.time() |
| 444 | print("Crawl4AI (with JavaScript execution):") |
| 445 | print(f"Time taken: {end - start:.2f} seconds") |
| 446 | print(f"Content length: {len(result.markdown)} characters") |
| 447 | print(f"Images found: {result.markdown.count('cldnry.s-nbcnews.com')}") |
| 448 | |
| 449 | print("\nNote on Speed Comparison:") |
| 450 | print("The speed test conducted here may not reflect optimal conditions.") |
| 451 | print("When we call Firecrawl's API, we're seeing its best performance,") |
no test coverage detected
searching dependent graphs…