(crawl_request: CrawlRequest, request: Request)
| 191 | @app.post("/crawl") |
| 192 | @limiter.limit(get_rate_limit()) |
| 193 | async def crawl_urls(crawl_request: CrawlRequest, request: Request): |
| 194 | logging.debug(f"[LOG] Crawl request for URL: {crawl_request.urls}") |
| 195 | global current_requests |
| 196 | async with lock: |
| 197 | if current_requests >= MAX_CONCURRENT_REQUESTS: |
| 198 | raise HTTPException(status_code=429, detail="Too many requests - please try again later.") |
| 199 | current_requests += 1 |
| 200 | |
| 201 | try: |
| 202 | logging.debug("[LOG] Loading extraction and chunking strategies...") |
| 203 | crawl_request.extraction_strategy_args['verbose'] = True |
| 204 | crawl_request.chunking_strategy_args['verbose'] = True |
| 205 | |
| 206 | extraction_strategy = import_strategy("crawl4ai.extraction_strategy", crawl_request.extraction_strategy, **crawl_request.extraction_strategy_args) |
| 207 | chunking_strategy = import_strategy("crawl4ai.chunking_strategy", crawl_request.chunking_strategy, **crawl_request.chunking_strategy_args) |
| 208 | |
| 209 | # Use ThreadPoolExecutor to run the synchronous WebCrawler in async manner |
| 210 | logging.debug("[LOG] Running the WebCrawler...") |
| 211 | with ThreadPoolExecutor() as executor: |
| 212 | loop = asyncio.get_event_loop() |
| 213 | futures = [ |
| 214 | loop.run_in_executor( |
| 215 | executor, |
| 216 | get_crawler().run, |
| 217 | str(url), |
| 218 | crawl_request.word_count_threshold, |
| 219 | extraction_strategy, |
| 220 | chunking_strategy, |
| 221 | crawl_request.bypass_cache, |
| 222 | crawl_request.css_selector, |
| 223 | crawl_request.screenshot, |
| 224 | crawl_request.user_agent, |
| 225 | crawl_request.verbose |
| 226 | ) |
| 227 | for url in crawl_request.urls |
| 228 | ] |
| 229 | results = await asyncio.gather(*futures) |
| 230 | |
| 231 | # if include_raw_html is False, remove the raw HTML content from the results |
| 232 | if not crawl_request.include_raw_html: |
| 233 | for result in results: |
| 234 | result.html = None |
| 235 | |
| 236 | return {"results": [result.model_dump() for result in results]} |
| 237 | finally: |
| 238 | async with lock: |
| 239 | current_requests -= 1 |
| 240 | |
| 241 | @app.get("/strategies/extraction", response_class=JSONResponse) |
| 242 | async def get_extraction_strategies(): |
nothing calls this directly
no test coverage detected
searching dependent graphs…