hub / github.com/unclecode/crawl4ai / arun_many

Method arun_many

crawl4ai/async_webcrawler.py:141–169 · view source on GitHub ↗

(
        self,
        urls: List[str],
        word_count_threshold=MIN_WORD_THRESHOLD,
        extraction_strategy: ExtractionStrategy = None,
        chunking_strategy: ChunkingStrategy = RegexChunking(),
        bypass_cache: bool = False,
        css_selector: str = None,
        screenshot: bool = False,
        user_agent: str = None,
        verbose=True,
        **kwargs,
    )

Source from the content-addressed store, hash-verified

139	return CrawlResult(url=url, html="", markdown = f"[ERROR] 🚫 arun(): Failed to crawl {url}, error: {e.msg}", success=False, error_message=e.msg)
140
141	async def arun_many(
142	self,
143	urls: List[str],
144	word_count_threshold=MIN_WORD_THRESHOLD,
145	extraction_strategy: ExtractionStrategy = None,
146	chunking_strategy: ChunkingStrategy = RegexChunking(),
147	bypass_cache: bool = False,
148	css_selector: str = None,
149	screenshot: bool = False,
150	user_agent: str = None,
151	verbose=True,
152	**kwargs,
153	) -> List[CrawlResult]:
154	tasks = [
155	self.arun(
156	url,
157	word_count_threshold,
158	extraction_strategy,
159	chunking_strategy,
160	bypass_cache,
161	css_selector,
162	screenshot,
163	user_agent,
164	verbose,
165	**kwargs
166	)
167	for url in urls
168	]
169	return await asyncio.gather(*tasks)
170
171
172	async def aprocess_html(

Callers 4

test_multiple_urlsFunction · 0.80

test_concurrent_crawling_performanceFunction · 0.80

mainFunction · 0.80

Calls 2

arunMethod · 0.95

RegexChunkingClass · 0.85

Tested by 3

test_multiple_urlsFunction · 0.64

test_concurrent_crawling_performanceFunction · 0.64