(
self,
url_models: List[UrlModel],
provider: str = DEFAULT_PROVIDER,
api_token: str = None,
extract_blocks_flag: bool = True,
word_count_threshold=MIN_WORD_THRESHOLD,
use_cached_html: bool = False,
css_selector: str = None,
screenshot: bool = False,
extraction_strategy: ExtractionStrategy = None,
chunking_strategy: ChunkingStrategy = RegexChunking(),
**kwargs,
)
| 65 | pass |
| 66 | |
| 67 | def fetch_pages( |
| 68 | self, |
| 69 | url_models: List[UrlModel], |
| 70 | provider: str = DEFAULT_PROVIDER, |
| 71 | api_token: str = None, |
| 72 | extract_blocks_flag: bool = True, |
| 73 | word_count_threshold=MIN_WORD_THRESHOLD, |
| 74 | use_cached_html: bool = False, |
| 75 | css_selector: str = None, |
| 76 | screenshot: bool = False, |
| 77 | extraction_strategy: ExtractionStrategy = None, |
| 78 | chunking_strategy: ChunkingStrategy = RegexChunking(), |
| 79 | **kwargs, |
| 80 | ) -> List[CrawlResult]: |
| 81 | extraction_strategy = extraction_strategy or NoExtractionStrategy() |
| 82 | def fetch_page_wrapper(url_model, *args, **kwargs): |
| 83 | return self.fetch_page(url_model, *args, **kwargs) |
| 84 | |
| 85 | with ThreadPoolExecutor() as executor: |
| 86 | results = list( |
| 87 | executor.map( |
| 88 | fetch_page_wrapper, |
| 89 | url_models, |
| 90 | [provider] * len(url_models), |
| 91 | [api_token] * len(url_models), |
| 92 | [extract_blocks_flag] * len(url_models), |
| 93 | [word_count_threshold] * len(url_models), |
| 94 | [css_selector] * len(url_models), |
| 95 | [screenshot] * len(url_models), |
| 96 | [use_cached_html] * len(url_models), |
| 97 | [extraction_strategy] * len(url_models), |
| 98 | [chunking_strategy] * len(url_models), |
| 99 | *[kwargs] * len(url_models), |
| 100 | ) |
| 101 | ) |
| 102 | |
| 103 | return results |
| 104 | |
| 105 | def run( |
| 106 | self, |
nothing calls this directly
no test coverage detected