MCPcopy
hub / github.com/unclecode/crawl4ai / fetch_page

Method fetch_page

crawl4ai/web_crawler.py:41–65  ·  view source on GitHub ↗
(
        self,
        url_model: UrlModel,
        provider: str = DEFAULT_PROVIDER,
        api_token: str = None,
        extract_blocks_flag: bool = True,
        word_count_threshold=MIN_WORD_THRESHOLD,
        css_selector: str = None,
        screenshot: bool = False,
        use_cached_html: bool = False,
        extraction_strategy: ExtractionStrategy = None,
        chunking_strategy: ChunkingStrategy = RegexChunking(),
        **kwargs,
    )

Source from the content-addressed store, hash-verified

39 print("[LOG] 🌞 WebCrawler is ready to crawl")
40
41 def fetch_page(
42 self,
43 url_model: UrlModel,
44 provider: str = DEFAULT_PROVIDER,
45 api_token: str = None,
46 extract_blocks_flag: bool = True,
47 word_count_threshold=MIN_WORD_THRESHOLD,
48 css_selector: str = None,
49 screenshot: bool = False,
50 use_cached_html: bool = False,
51 extraction_strategy: ExtractionStrategy = None,
52 chunking_strategy: ChunkingStrategy = RegexChunking(),
53 **kwargs,
54 ) -> CrawlResult:
55 return self.run(
56 url_model.url,
57 word_count_threshold,
58 extraction_strategy or NoExtractionStrategy(),
59 chunking_strategy,
60 bypass_cache=url_model.forced,
61 css_selector=css_selector,
62 screenshot=screenshot,
63 **kwargs,
64 )
65 pass
66
67 def fetch_pages(
68 self,

Callers 1

fetch_page_wrapperMethod · 0.95

Calls 3

runMethod · 0.95
RegexChunkingClass · 0.85

Tested by

no test coverage detected