| 4 | |
| 5 | |
| 6 | def test_fetch_html(mocker): |
| 7 | title = "ScrapeGraph AI" |
| 8 | link_url = "https://github.com/VinciGit00/Scrapegraph-ai" |
| 9 | img_url = "https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/scrapegraphai_logo.png" |
| 10 | content = f""" |
| 11 | <html> |
| 12 | <head> |
| 13 | <title>{title}</title> |
| 14 | </head> |
| 15 | <body> |
| 16 | <a href="{link_url}">ScrapeGraphAI: You Only Scrape Once</a> |
| 17 | <img src="{img_url}" alt="Scrapegraph-ai Logo"> |
| 18 | </body> |
| 19 | </html> |
| 20 | """ |
| 21 | mock_loader_cls = mocker.patch("scrapegraphai.nodes.fetch_node.ChromiumLoader") |
| 22 | mock_loader = mock_loader_cls.return_value |
| 23 | mock_loader.load.return_value = [Document(page_content=content)] |
| 24 | node = FetchNode( |
| 25 | input="url | local_dir", |
| 26 | output=["doc", "links", "images"], |
| 27 | node_config={"headless": False}, |
| 28 | ) |
| 29 | result = node.execute({"url": "https://scrapegraph-ai.com/example"}) |
| 30 | |
| 31 | mock_loader.load.assert_called_once() |
| 32 | doc = result["doc"][0] |
| 33 | assert result is not None |
| 34 | assert "ScrapeGraph AI" in doc.page_content |
| 35 | assert "https://github.com/VinciGit00/Scrapegraph-ai" in doc.page_content |
| 36 | assert ( |
| 37 | "https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/scrapegraphai_logo.png" |
| 38 | in doc.page_content |
| 39 | ) |
| 40 | |
| 41 | |
| 42 | def test_fetch_json(): |