Test that lazy_load yields Document objects even if the scraping function returns a non‐string value.
(monkeypatch)
| 1461 | |
| 1462 | @pytest.mark.asyncio |
| 1463 | async def test_lazy_load_non_string_scraper(monkeypatch): |
| 1464 | """Test that lazy_load yields Document objects even if the scraping function returns a non‐string value.""" |
| 1465 | urls = ["http://example.com"] |
| 1466 | loader = ChromiumLoader(urls, backend="playwright", requires_js_support=False) |
| 1467 | |
| 1468 | async def dummy_non_string(url): |
| 1469 | # Return an integer instead of an HTML string |
| 1470 | return 12345 |
| 1471 | |
| 1472 | monkeypatch.setattr(loader, "ascrape_playwright", dummy_non_string) |
| 1473 | docs = list(loader.lazy_load()) |
| 1474 | # Check that we get one Document and its page_content is the non‐string value returned by the scraper |
| 1475 | from langchain_core.documents import Document |
| 1476 | |
| 1477 | assert len(docs) == 1 |
| 1478 | for doc in docs: |
| 1479 | assert isinstance(doc, Document) |
| 1480 | assert doc.page_content == 12345 |
| 1481 | assert doc.metadata["source"] in urls |
| 1482 | |
| 1483 | |
| 1484 | @pytest.mark.asyncio |
nothing calls this directly
no test coverage detected