MCPcopy Index your code
hub / github.com/grapeot/devin.cursorrules / process_urls

Function process_urls

tools/web_scraper.py:126–155  ·  view source on GitHub ↗

Process multiple URLs concurrently.

(urls: List[str], max_concurrent: int = 5)

Source from the content-addressed store, hash-verified

124 return ""
125
126async def process_urls(urls: List[str], max_concurrent: int = 5) -> List[str]:
127 """Process multiple URLs concurrently."""
128 async with async_playwright() as p:
129 browser = await p.chromium.launch()
130 try:
131 # Create browser contexts
132 n_contexts = min(len(urls), max_concurrent)
133 contexts = [await browser.new_context() for _ in range(n_contexts)]
134
135 # Create tasks for each URL
136 tasks = []
137 for i, url in enumerate(urls):
138 context = contexts[i % len(contexts)]
139 task = fetch_page(url, context)
140 tasks.append(task)
141
142 # Gather results
143 html_contents = await asyncio.gather(*tasks)
144
145 # Parse HTML contents in parallel
146 with Pool() as pool:
147 results = pool.map(parse_html, html_contents)
148
149 return results
150
151 finally:
152 # Cleanup
153 for context in contexts:
154 await context.close()
155 await browser.close()
156
157def validate_url(url: str) -> bool:
158 """Validate if the given string is a valid URL."""

Callers 2

test_process_urlsMethod · 0.90
mainFunction · 0.85

Calls 1

fetch_pageFunction · 0.85

Tested by 1

test_process_urlsMethod · 0.72