Import any new links from subscriptions and retry any previously failed/skipped links
(resume: Optional[float]=None,
only_new: bool=ONLY_NEW,
index_only: bool=False,
overwrite: bool=False,
filter_patterns_str: Optional[str]=None,
filter_patterns: Optional[List[str]]=None,
filter_type: Optional[str]=None,
status: Optional[str]=None,
after: Optional[str]=None,
before: Optional[str]=None,
extractors: str="",
out_dir: Path=OUTPUT_DIR)
| 778 | |
| 779 | @enforce_types |
| 780 | def update(resume: Optional[float]=None, |
| 781 | only_new: bool=ONLY_NEW, |
| 782 | index_only: bool=False, |
| 783 | overwrite: bool=False, |
| 784 | filter_patterns_str: Optional[str]=None, |
| 785 | filter_patterns: Optional[List[str]]=None, |
| 786 | filter_type: Optional[str]=None, |
| 787 | status: Optional[str]=None, |
| 788 | after: Optional[str]=None, |
| 789 | before: Optional[str]=None, |
| 790 | extractors: str="", |
| 791 | out_dir: Path=OUTPUT_DIR) -> List[Link]: |
| 792 | """Import any new links from subscriptions and retry any previously failed/skipped links""" |
| 793 | |
| 794 | check_data_folder(out_dir=out_dir) |
| 795 | check_dependencies() |
| 796 | new_links: List[Link] = [] # TODO: Remove input argument: only_new |
| 797 | |
| 798 | extractors = extractors.split(",") if extractors else [] |
| 799 | |
| 800 | # Step 1: Filter for selected_links |
| 801 | matching_snapshots = list_links( |
| 802 | filter_patterns=filter_patterns, |
| 803 | filter_type=filter_type, |
| 804 | before=before, |
| 805 | after=after, |
| 806 | ) |
| 807 | |
| 808 | matching_folders = list_folders( |
| 809 | links=matching_snapshots, |
| 810 | status=status, |
| 811 | out_dir=out_dir, |
| 812 | ) |
| 813 | all_links = [link for link in matching_folders.values() if link] |
| 814 | |
| 815 | if index_only: |
| 816 | for link in all_links: |
| 817 | write_link_details(link, out_dir=out_dir, skip_sql_index=True) |
| 818 | index_links(all_links, out_dir=out_dir) |
| 819 | return all_links |
| 820 | |
| 821 | # Step 2: Run the archive methods for each link |
| 822 | to_archive = new_links if only_new else all_links |
| 823 | if resume: |
| 824 | to_archive = [ |
| 825 | link for link in to_archive |
| 826 | if link.timestamp >= str(resume) |
| 827 | ] |
| 828 | if not to_archive: |
| 829 | stderr('') |
| 830 | stderr(f'[√] Nothing found to resume after {resume}', color='green') |
| 831 | return all_links |
| 832 | |
| 833 | archive_kwargs = { |
| 834 | "out_dir": out_dir, |
| 835 | } |
| 836 | if extractors: |
| 837 | archive_kwargs["methods"] = extractors |
no test coverage detected