Start a batch scrape job and wait until completion.
(
self,
urls: List[str],
*,
formats: Optional[List['FormatOption']] = None,
headers: Optional[Dict[str, str]] = None,
include_tags: Optional[List[str]] = None,
exclude_tags: Optional[List[str]] = None,
only_main_content: Optional[bool] = None,
timeout: Optional[int] = None,
wait_for: Optional[int] = None,
mobile: Optional[bool] = None,
parsers: Optional[Union[List[str], List[Union[str, PDFParser]]]] = None,
actions: Optional[List[Union['WaitAction', 'ScreenshotAction', 'ClickAction', 'WriteAction', 'PressAction', 'ScrollAction', 'ScrapeAction', 'ExecuteJavascriptAction', 'PDFAction']]] = None,
location: Optional['Location'] = None,
skip_tls_verification: Optional[bool] = None,
remove_base64_images: Optional[bool] = None,
fast_mode: Optional[bool] = None,
use_mock: Optional[str] = None,
block_ads: Optional[bool] = None,
proxy: Optional[str] = None,
max_age: Optional[int] = None,
store_in_cache: Optional[bool] = None,
lockdown: Optional[bool] = None,
webhook: Optional[Union[str, WebhookConfig]] = None,
append_to_id: Optional[str] = None,
ignore_invalid_urls: Optional[bool] = None,
max_concurrency: Optional[int] = None,
zero_data_retention: Optional[bool] = None,
integration: Optional[str] = None,
idempotency_key: Optional[str] = None,
poll_interval: int = 2,
wait_timeout: Optional[int] = None,
)
| 1514 | return Watcher(self, job_id, kind=kind, poll_interval=poll_interval, timeout=timeout) |
| 1515 | |
| 1516 | def batch_scrape( |
| 1517 | self, |
| 1518 | urls: List[str], |
| 1519 | *, |
| 1520 | formats: Optional[List['FormatOption']] = None, |
| 1521 | headers: Optional[Dict[str, str]] = None, |
| 1522 | include_tags: Optional[List[str]] = None, |
| 1523 | exclude_tags: Optional[List[str]] = None, |
| 1524 | only_main_content: Optional[bool] = None, |
| 1525 | timeout: Optional[int] = None, |
| 1526 | wait_for: Optional[int] = None, |
| 1527 | mobile: Optional[bool] = None, |
| 1528 | parsers: Optional[Union[List[str], List[Union[str, PDFParser]]]] = None, |
| 1529 | actions: Optional[List[Union['WaitAction', 'ScreenshotAction', 'ClickAction', 'WriteAction', 'PressAction', 'ScrollAction', 'ScrapeAction', 'ExecuteJavascriptAction', 'PDFAction']]] = None, |
| 1530 | location: Optional['Location'] = None, |
| 1531 | skip_tls_verification: Optional[bool] = None, |
| 1532 | remove_base64_images: Optional[bool] = None, |
| 1533 | fast_mode: Optional[bool] = None, |
| 1534 | use_mock: Optional[str] = None, |
| 1535 | block_ads: Optional[bool] = None, |
| 1536 | proxy: Optional[str] = None, |
| 1537 | max_age: Optional[int] = None, |
| 1538 | store_in_cache: Optional[bool] = None, |
| 1539 | lockdown: Optional[bool] = None, |
| 1540 | webhook: Optional[Union[str, WebhookConfig]] = None, |
| 1541 | append_to_id: Optional[str] = None, |
| 1542 | ignore_invalid_urls: Optional[bool] = None, |
| 1543 | max_concurrency: Optional[int] = None, |
| 1544 | zero_data_retention: Optional[bool] = None, |
| 1545 | integration: Optional[str] = None, |
| 1546 | idempotency_key: Optional[str] = None, |
| 1547 | poll_interval: int = 2, |
| 1548 | wait_timeout: Optional[int] = None, |
| 1549 | ): |
| 1550 | """ |
| 1551 | Start a batch scrape job and wait until completion. |
| 1552 | """ |
| 1553 | options = ScrapeOptions( |
| 1554 | **{k: v for k, v in dict( |
| 1555 | formats=formats, |
| 1556 | headers=headers, |
| 1557 | include_tags=include_tags, |
| 1558 | exclude_tags=exclude_tags, |
| 1559 | only_main_content=only_main_content, |
| 1560 | timeout=timeout, |
| 1561 | wait_for=wait_for, |
| 1562 | mobile=mobile, |
| 1563 | parsers=parsers, |
| 1564 | actions=actions, |
| 1565 | location=location, |
| 1566 | skip_tls_verification=skip_tls_verification, |
| 1567 | remove_base64_images=remove_base64_images, |
| 1568 | fast_mode=fast_mode, |
| 1569 | use_mock=use_mock, |
| 1570 | block_ads=block_ads, |
| 1571 | proxy=proxy, |
| 1572 | max_age=max_age, |
| 1573 | store_in_cache=store_in_cache, |