MCPcopy
hub / github.com/unclecode/crawl4ai / test_run_different_strategies

Method test_run_different_strategies

tests/test_web_crawler.py:24–43  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

22 self.assertTrue(result.success, "Failed to crawl and extract using default strategies")
23
24 def test_run_different_strategies(self):
25 url = 'https://www.nbcnews.com/business'
26
27 # Test with FixedLengthWordChunking and LLMExtractionStrategy
28 result = self.crawler.run(
29 url=url,
30 word_count_threshold=5,
31 chunking_strategy=FixedLengthWordChunking(chunk_size=100),
32 extraction_strategy=LLMExtractionStrategy(provider="openai/gpt-3.5-turbo", api_token=os.getenv('OPENAI_API_KEY')), bypass_cache=True
33 )
34 self.assertTrue(result.success, "Failed to crawl and extract with FixedLengthWordChunking and LLMExtractionStrategy")
35
36 # Test with SlidingWindowChunking and TopicExtractionStrategy
37 result = self.crawler.run(
38 url=url,
39 word_count_threshold=5,
40 chunking_strategy=SlidingWindowChunking(window_size=100, step=50),
41 extraction_strategy=TopicExtractionStrategy(num_keywords=5), bypass_cache=True
42 )
43 self.assertTrue(result.success, "Failed to crawl and extract with SlidingWindowChunking and TopicExtractionStrategy")
44
45 def test_invalid_url(self):
46 with self.assertRaises(Exception) as context:

Callers

nothing calls this directly

Calls 5

runMethod · 0.45

Tested by

no test coverage detected