Function process_urls

tools/web_scraper.py:126–155 · view source on GitHub ↗

Process multiple URLs concurrently.

(urls: List[str], max_concurrent: int = 5)

Source from the content-addressed store, hash-verified

124	return ""
125
126	async def process_urls(urls: List[str], max_concurrent: int = 5) -> List[str]:
127	"""Process multiple URLs concurrently."""
128	async with async_playwright() as p:
129	browser = await p.chromium.launch()
130	try:
131	# Create browser contexts
132	n_contexts = min(len(urls), max_concurrent)
133	contexts = [await browser.new_context() for _ in range(n_contexts)]
134
135	# Create tasks for each URL
136	tasks = []
137	for i, url in enumerate(urls):
138	context = contexts[i % len(contexts)]
139	task = fetch_page(url, context)
140	tasks.append(task)
141
142	# Gather results
143	html_contents = await asyncio.gather(*tasks)
144
145	# Parse HTML contents in parallel
146	with Pool() as pool:
147	results = pool.map(parse_html, html_contents)
148
149	return results
150
151	finally:
152	# Cleanup
153	for context in contexts:
154	await context.close()
155	await browser.close()
156
157	def validate_url(url: str) -> bool:
158	"""Validate if the given string is a valid URL."""

test_process_urlsMethod · 0.90

mainFunction · 0.85

fetch_pageFunction · 0.85

test_process_urlsMethod · 0.72