remove chrome://, about:// or other schemed links that cant be archived
(links: Iterable[Link])
| 134 | |
| 135 | @enforce_types |
| 136 | def archivable_links(links: Iterable[Link]) -> Iterable[Link]: |
| 137 | """remove chrome://, about:// or other schemed links that cant be archived""" |
| 138 | for link in links: |
| 139 | try: |
| 140 | urlparse(link.url) |
| 141 | except ValueError: |
| 142 | continue |
| 143 | if scheme(link.url) not in ('http', 'https', 'ftp'): |
| 144 | continue |
| 145 | if URL_DENYLIST_PTN and URL_DENYLIST_PTN.search(link.url): |
| 146 | continue |
| 147 | if URL_ALLOWLIST_PTN and (not URL_ALLOWLIST_PTN.search(link.url)): |
| 148 | continue |
| 149 | |
| 150 | yield link |
| 151 | |
| 152 | |
| 153 | @enforce_types |