Lazily load text content from the provided URLs. This method yields Documents one at a time as they're scraped, instead of waiting to scrape all URLs before returning. Yields: Document: The scraped content encapsulated within a Document object.
(self)
| 444 | return [doc async for doc in self.alazy_load()] |
| 445 | |
| 446 | def lazy_load(self) -> Iterator[Document]: |
| 447 | """ |
| 448 | Lazily load text content from the provided URLs. |
| 449 | |
| 450 | This method yields Documents one at a time as they're scraped, |
| 451 | instead of waiting to scrape all URLs before returning. |
| 452 | |
| 453 | Yields: |
| 454 | Document: The scraped content encapsulated within a Document object. |
| 455 | """ |
| 456 | scraping_fn = ( |
| 457 | self.ascrape_with_js_support |
| 458 | if self.requires_js_support |
| 459 | else getattr(self, f"ascrape_{self.backend}") |
| 460 | ) |
| 461 | |
| 462 | for url in self.urls: |
| 463 | html_content = asyncio.run(scraping_fn(url)) |
| 464 | metadata = {"source": url} |
| 465 | yield Document(page_content=html_content, metadata=metadata) |
| 466 | |
| 467 | async def alazy_load(self) -> AsyncIterator[Document]: |
| 468 | """ |