Test cleanup_html with valid HTML containing title, body, links, images, and scripts.
()
| 30 | |
| 31 | |
| 32 | def test_cleanup_html_success(): |
| 33 | """Test cleanup_html with valid HTML containing title, body, links, images, and scripts.""" |
| 34 | html = """ |
| 35 | <html> |
| 36 | <head> |
| 37 | <title>Test Title</title> |
| 38 | </head> |
| 39 | <body> |
| 40 | <p>Hello World!</p> |
| 41 | <a href="/page">Link</a> |
| 42 | <img src="image.jpg"/> |
| 43 | <script>var info = {"num": 123};</script> |
| 44 | </body> |
| 45 | </html> |
| 46 | """ |
| 47 | base_url = "http://example.com" |
| 48 | title, minimized_body, link_urls, image_urls, script_content = cleanup_html( |
| 49 | html, base_url |
| 50 | ) |
| 51 | assert title == "Test Title" |
| 52 | assert "<body>" in minimized_body and "</body>" in minimized_body |
| 53 | # Check the link is properly joined |
| 54 | assert "http://example.com/page" in link_urls |
| 55 | # Check the image is properly joined |
| 56 | assert "http://example.com/image.jpg" in image_urls |
| 57 | # Check that we got some output from the script extraction |
| 58 | assert "JSON data from script" in script_content |
| 59 | |
| 60 | |
| 61 | def test_cleanup_html_no_body(): |
nothing calls this directly
no test coverage detected