Function main

docs/examples/async_webcrawler_multiple_urls_example.py:10–45 · view source on GitHub ↗

()

Source from the content-addressed store, hash-verified

8	from crawl4ai import AsyncWebCrawler
9
10	async def main():
11	# Initialize the AsyncWebCrawler
12	async with AsyncWebCrawler(verbose=True) as crawler:
13	# List of URLs to crawl
14	urls = [
15	"https://example.com",
16	"https://python.org",
17	"https://github.com",
18	"https://stackoverflow.com",
19	"https://news.ycombinator.com"
20	]
21
22	# Set up crawling parameters
23	word_count_threshold = 100
24
25	# Run the crawling process for multiple URLs
26	results = await crawler.arun_many(
27	urls=urls,
28	word_count_threshold=word_count_threshold,
29	bypass_cache=True,
30	verbose=True
31	)
32
33	# Process the results
34	for result in results:
35	if result.success:
36	print(f"Successfully crawled: {result.url}")
37	print(f"Title: {result.metadata.get('title', 'N/A')}")
38	print(f"Word count: {len(result.markdown.split())}")
39	print(f"Number of links: {len(result.links.get('internal', [])) + len(result.links.get('external', []))}")
40	print(f"Number of images: {len(result.media.get('images', []))}")
41	print("---")
42	else:
43	print(f"Failed to crawl: {result.url}")
44	print(f"Error: {result.error_message}")
45	print("---")
46
47	if __name__ == "__main__":
48	asyncio.run(main())

async_webcrawler_multiple_urls_example.pyFile · 0.70

AsyncWebCrawlerClass · 0.90

arun_manyMethod · 0.80

no test coverage detected

searching dependent graphs…