(
self,
url_models: List[UrlModel],
provider: str = DEFAULT_PROVIDER,
api_token: str = None,
extract_blocks_flag: bool = True,
word_count_threshold=MIN_WORD_THRESHOLD,
use_cached_html: bool = False,
css_selector: str = None,
screenshot: bool = False,
extraction_strategy: ExtractionStrategy = None,
chunking_strategy: ChunkingStrategy = RegexChunking(),
**kwargs,
)
| 202 | ) |
| 203 | |
| 204 | def fetch_pages( |
| 205 | self, |
| 206 | url_models: List[UrlModel], |
| 207 | provider: str = DEFAULT_PROVIDER, |
| 208 | api_token: str = None, |
| 209 | extract_blocks_flag: bool = True, |
| 210 | word_count_threshold=MIN_WORD_THRESHOLD, |
| 211 | use_cached_html: bool = False, |
| 212 | css_selector: str = None, |
| 213 | screenshot: bool = False, |
| 214 | extraction_strategy: ExtractionStrategy = None, |
| 215 | chunking_strategy: ChunkingStrategy = RegexChunking(), |
| 216 | **kwargs, |
| 217 | ) -> List[CrawlResult]: |
| 218 | extraction_strategy = extraction_strategy or NoExtractionStrategy() |
| 219 | def fetch_page_wrapper(url_model, *args, **kwargs): |
| 220 | return self.fetch_page(url_model, *args, **kwargs) |
| 221 | |
| 222 | with ThreadPoolExecutor() as executor: |
| 223 | results = list( |
| 224 | executor.map( |
| 225 | fetch_page_wrapper, |
| 226 | url_models, |
| 227 | [provider] * len(url_models), |
| 228 | [api_token] * len(url_models), |
| 229 | [extract_blocks_flag] * len(url_models), |
| 230 | [word_count_threshold] * len(url_models), |
| 231 | [css_selector] * len(url_models), |
| 232 | [screenshot] * len(url_models), |
| 233 | [use_cached_html] * len(url_models), |
| 234 | [extraction_strategy] * len(url_models), |
| 235 | [chunking_strategy] * len(url_models), |
| 236 | *[kwargs] * len(url_models), |
| 237 | ) |
| 238 | ) |
| 239 | |
| 240 | return results |
| 241 | |
| 242 | def run( |
| 243 | self, |
nothing calls this directly
no test coverage detected