MCPcopy
hub / github.com/ArchiveBox/ArchiveBox / parse_links_from_source

Function parse_links_from_source

archivebox/index/__init__.py:271–285  ·  view source on GitHub ↗
(source_path: str, root_url: Optional[str]=None, parser: str="auto")

Source from the content-addressed store, hash-verified

269
270@enforce_types
271def parse_links_from_source(source_path: str, root_url: Optional[str]=None, parser: str="auto") -> Tuple[List[Link], List[Link]]:
272
273 from ..parsers import parse_links
274
275 new_links: List[Link] = []
276
277 # parse and validate the import file
278 raw_links, parser_name = parse_links(source_path, root_url=root_url, parser=parser)
279 new_links = validate_links(raw_links)
280
281 if parser_name:
282 num_parsed = len(raw_links)
283 log_parsing_finished(num_parsed, parser_name)
284
285 return new_links
286
287@enforce_types
288def fix_duplicate_links_in_index(snapshots: QuerySet, links: Iterable[Link]) -> Iterable[Link]:

Callers 1

addFunction · 0.85

Calls 3

parse_linksFunction · 0.85
validate_linksFunction · 0.85
log_parsing_finishedFunction · 0.85

Tested by

no test coverage detected