hub / github.com/unclecode/crawl4ai / test_run_different_strategies

Method test_run_different_strategies

tests/test_web_crawler.py:24–43 · view source on GitHub ↗

(self)

Source from the content-addressed store, hash-verified

22	self.assertTrue(result.success, "Failed to crawl and extract using default strategies")
23
24	def test_run_different_strategies(self):
25	url = 'https://www.nbcnews.com/business'
26
27	# Test with FixedLengthWordChunking and LLMExtractionStrategy
28	result = self.crawler.run(
29	url=url,
30	word_count_threshold=5,
31	chunking_strategy=FixedLengthWordChunking(chunk_size=100),
32	extraction_strategy=LLMExtractionStrategy(provider="openai/gpt-3.5-turbo", api_token=os.getenv('OPENAI_API_KEY')), bypass_cache=True
33	)
34	self.assertTrue(result.success, "Failed to crawl and extract with FixedLengthWordChunking and LLMExtractionStrategy")
35
36	# Test with SlidingWindowChunking and TopicExtractionStrategy
37	result = self.crawler.run(
38	url=url,
39	word_count_threshold=5,
40	chunking_strategy=SlidingWindowChunking(window_size=100, step=50),
41	extraction_strategy=TopicExtractionStrategy(num_keywords=5), bypass_cache=True
42	)
43	self.assertTrue(result.success, "Failed to crawl and extract with SlidingWindowChunking and TopicExtractionStrategy")
44
45	def test_invalid_url(self):
46	with self.assertRaises(Exception) as context:

nothing calls this directly

FixedLengthWordChunkingClass · 0.90

LLMExtractionStrategyClass · 0.90

SlidingWindowChunkingClass · 0.90

TopicExtractionStrategyClass · 0.90

runMethod · 0.45

no test coverage detected