MCPcopy Index your code
hub / github.com/unclecode/crawl4ai / main

Function main

docs/examples/async_webcrawler_multiple_urls_example.py:10–45  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

8from crawl4ai import AsyncWebCrawler
9
10async def main():
11 # Initialize the AsyncWebCrawler
12 async with AsyncWebCrawler(verbose=True) as crawler:
13 # List of URLs to crawl
14 urls = [
15 "https://example.com",
16 "https://python.org",
17 "https://github.com",
18 "https://stackoverflow.com",
19 "https://news.ycombinator.com"
20 ]
21
22 # Set up crawling parameters
23 word_count_threshold = 100
24
25 # Run the crawling process for multiple URLs
26 results = await crawler.arun_many(
27 urls=urls,
28 word_count_threshold=word_count_threshold,
29 bypass_cache=True,
30 verbose=True
31 )
32
33 # Process the results
34 for result in results:
35 if result.success:
36 print(f"Successfully crawled: {result.url}")
37 print(f"Title: {result.metadata.get('title', 'N/A')}")
38 print(f"Word count: {len(result.markdown.split())}")
39 print(f"Number of links: {len(result.links.get('internal', [])) + len(result.links.get('external', []))}")
40 print(f"Number of images: {len(result.media.get('images', []))}")
41 print("---")
42 else:
43 print(f"Failed to crawl: {result.url}")
44 print(f"Error: {result.error_message}")
45 print("---")
46
47if __name__ == "__main__":
48 asyncio.run(main())

Calls 2

AsyncWebCrawlerClass · 0.90
arun_manyMethod · 0.80

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…