(self)
| 70 | self.assertTrue(result.success, "Failed to bypass cache and fetch fresh data") |
| 71 | |
| 72 | def test_fetch_multiple_pages(self): |
| 73 | urls = [ |
| 74 | 'https://www.nbcnews.com/business', |
| 75 | 'https://www.bbc.com/news' |
| 76 | ] |
| 77 | results = [] |
| 78 | for url in urls: |
| 79 | result = self.crawler.run( |
| 80 | url=url, |
| 81 | word_count_threshold=5, |
| 82 | chunking_strategy=RegexChunking(), |
| 83 | extraction_strategy=CosineStrategy(), |
| 84 | bypass_cache=True |
| 85 | ) |
| 86 | results.append(result) |
| 87 | |
| 88 | self.assertEqual(len(results), 2, "Failed to crawl and extract multiple pages") |
| 89 | for result in results: |
| 90 | self.assertTrue(result.success, "Failed to crawl and extract a page in the list") |
| 91 | |
| 92 | def test_run_fixed_length_word_chunking_and_no_extraction(self): |
| 93 | result = self.crawler.run( |
nothing calls this directly
no test coverage detected