Process multiple URLs concurrently.
(urls: List[str], max_concurrent: int = 5)
| 124 | return "" |
| 125 | |
| 126 | async def process_urls(urls: List[str], max_concurrent: int = 5) -> List[str]: |
| 127 | """Process multiple URLs concurrently.""" |
| 128 | async with async_playwright() as p: |
| 129 | browser = await p.chromium.launch() |
| 130 | try: |
| 131 | # Create browser contexts |
| 132 | n_contexts = min(len(urls), max_concurrent) |
| 133 | contexts = [await browser.new_context() for _ in range(n_contexts)] |
| 134 | |
| 135 | # Create tasks for each URL |
| 136 | tasks = [] |
| 137 | for i, url in enumerate(urls): |
| 138 | context = contexts[i % len(contexts)] |
| 139 | task = fetch_page(url, context) |
| 140 | tasks.append(task) |
| 141 | |
| 142 | # Gather results |
| 143 | html_contents = await asyncio.gather(*tasks) |
| 144 | |
| 145 | # Parse HTML contents in parallel |
| 146 | with Pool() as pool: |
| 147 | results = pool.map(parse_html, html_contents) |
| 148 | |
| 149 | return results |
| 150 | |
| 151 | finally: |
| 152 | # Cleanup |
| 153 | for context in contexts: |
| 154 | await context.close() |
| 155 | await browser.close() |
| 156 | |
| 157 | def validate_url(url: str) -> bool: |
| 158 | """Validate if the given string is a valid URL.""" |