MCPcopy
hub / github.com/firecrawl/firecrawl / batch_scrape

Method batch_scrape

apps/python-sdk/firecrawl/v2/client.py:1516–1591  ·  view source on GitHub ↗

Start a batch scrape job and wait until completion.

(
        self,
        urls: List[str],
        *,
        formats: Optional[List['FormatOption']] = None,
        headers: Optional[Dict[str, str]] = None,
        include_tags: Optional[List[str]] = None,
        exclude_tags: Optional[List[str]] = None,
        only_main_content: Optional[bool] = None,
        timeout: Optional[int] = None,
        wait_for: Optional[int] = None,
        mobile: Optional[bool] = None,
        parsers: Optional[Union[List[str], List[Union[str, PDFParser]]]] = None,
        actions: Optional[List[Union['WaitAction', 'ScreenshotAction', 'ClickAction', 'WriteAction', 'PressAction', 'ScrollAction', 'ScrapeAction', 'ExecuteJavascriptAction', 'PDFAction']]] = None,
        location: Optional['Location'] = None,
        skip_tls_verification: Optional[bool] = None,
        remove_base64_images: Optional[bool] = None,
        fast_mode: Optional[bool] = None,
        use_mock: Optional[str] = None,
        block_ads: Optional[bool] = None,
        proxy: Optional[str] = None,
        max_age: Optional[int] = None,
        store_in_cache: Optional[bool] = None,
        lockdown: Optional[bool] = None,
        webhook: Optional[Union[str, WebhookConfig]] = None,
        append_to_id: Optional[str] = None,
        ignore_invalid_urls: Optional[bool] = None,
        max_concurrency: Optional[int] = None,
        zero_data_retention: Optional[bool] = None,
        integration: Optional[str] = None,
        idempotency_key: Optional[str] = None,
        poll_interval: int = 2,
        wait_timeout: Optional[int] = None,
    )

Source from the content-addressed store, hash-verified

1514 return Watcher(self, job_id, kind=kind, poll_interval=poll_interval, timeout=timeout)
1515
1516 def batch_scrape(
1517 self,
1518 urls: List[str],
1519 *,
1520 formats: Optional[List['FormatOption']] = None,
1521 headers: Optional[Dict[str, str]] = None,
1522 include_tags: Optional[List[str]] = None,
1523 exclude_tags: Optional[List[str]] = None,
1524 only_main_content: Optional[bool] = None,
1525 timeout: Optional[int] = None,
1526 wait_for: Optional[int] = None,
1527 mobile: Optional[bool] = None,
1528 parsers: Optional[Union[List[str], List[Union[str, PDFParser]]]] = None,
1529 actions: Optional[List[Union['WaitAction', 'ScreenshotAction', 'ClickAction', 'WriteAction', 'PressAction', 'ScrollAction', 'ScrapeAction', 'ExecuteJavascriptAction', 'PDFAction']]] = None,
1530 location: Optional['Location'] = None,
1531 skip_tls_verification: Optional[bool] = None,
1532 remove_base64_images: Optional[bool] = None,
1533 fast_mode: Optional[bool] = None,
1534 use_mock: Optional[str] = None,
1535 block_ads: Optional[bool] = None,
1536 proxy: Optional[str] = None,
1537 max_age: Optional[int] = None,
1538 store_in_cache: Optional[bool] = None,
1539 lockdown: Optional[bool] = None,
1540 webhook: Optional[Union[str, WebhookConfig]] = None,
1541 append_to_id: Optional[str] = None,
1542 ignore_invalid_urls: Optional[bool] = None,
1543 max_concurrency: Optional[int] = None,
1544 zero_data_retention: Optional[bool] = None,
1545 integration: Optional[str] = None,
1546 idempotency_key: Optional[str] = None,
1547 poll_interval: int = 2,
1548 wait_timeout: Optional[int] = None,
1549 ):
1550 """
1551 Start a batch scrape job and wait until completion.
1552 """
1553 options = ScrapeOptions(
1554 **{k: v for k, v in dict(
1555 formats=formats,
1556 headers=headers,
1557 include_tags=include_tags,
1558 exclude_tags=exclude_tags,
1559 only_main_content=only_main_content,
1560 timeout=timeout,
1561 wait_for=wait_for,
1562 mobile=mobile,
1563 parsers=parsers,
1564 actions=actions,
1565 location=location,
1566 skip_tls_verification=skip_tls_verification,
1567 remove_base64_images=remove_base64_images,
1568 fast_mode=fast_mode,
1569 use_mock=use_mock,
1570 block_ads=block_ads,
1571 proxy=proxy,
1572 max_age=max_age,
1573 store_in_cache=store_in_cache,

Callers 8

batch_scrape_urlsMethod · 0.95
mainFunction · 0.45
mainFunction · 0.45

Calls 1

ScrapeOptionsClass · 0.70