MCPcopy
hub / github.com/zer0yu/CyberSecurityRSS / run_sync

Function run_sync

scripts/opml_sync.py:551–662  ·  view source on GitHub ↗
(
    tiny_path: Path,
    full_path: Path,
    mode: str,
    fallback_category: str,
    timeout: float,
    retries: int,
    workers: int,
    state_file: Path,
    delete_threshold: int,
    max_probe_bytes: int,
    checker: Optional[Callable[[str], Any]] = None,
)

Source from the content-addressed store, hash-verified

549
550
551def run_sync(
552 tiny_path: Path,
553 full_path: Path,
554 mode: str,
555 fallback_category: str,
556 timeout: float,
557 retries: int,
558 workers: int,
559 state_file: Path,
560 delete_threshold: int,
561 max_probe_bytes: int,
562 checker: Optional[Callable[[str], Any]] = None,
563) -> Tuple[SyncStats, bool]:
564 tiny_path = Path(tiny_path)
565 full_path = Path(full_path)
566 state_file = Path(state_file)
567
568 if mode not in {"check", "apply"}:
569 raise ValueError(f"Unsupported mode: {mode}")
570
571 tiny_original = tiny_path.read_bytes()
572 full_original = full_path.read_bytes()
573 state_original = state_file.read_bytes() if state_file.exists() else b""
574
575 tiny_tree = ET.parse(tiny_path)
576 full_tree = ET.parse(full_path)
577
578 tiny_body = get_body(tiny_tree, tiny_path)
579 full_body = get_body(full_tree, full_path)
580
581 stats = SyncStats()
582 stats.tiny_links_before = len(collect_rss_urls(tiny_body))
583 stats.full_links_before = len(collect_rss_urls(full_body))
584
585 all_urls = set(collect_rss_urls(tiny_body)) | set(collect_rss_urls(full_body))
586 check_fn = checker or HttpFeedChecker(
587 timeout=timeout,
588 retries=retries,
589 user_agent=DEFAULT_USER_AGENT,
590 max_probe_bytes=max_probe_bytes,
591 )
592 check_results = check_urls_parallel(all_urls, check_fn, workers=workers)
593
594 stats.checked_urls = len(check_results)
595 stats.alive_urls = sum(1 for result in check_results.values() if result.alive)
596 stats.dead_urls = stats.checked_urls - stats.alive_urls
597
598 previous_state = load_health_state(state_file)
599 next_state_payload, removable_urls, hard_fail_urls, transient_fail_urls = build_next_health_state(
600 urls=all_urls,
601 check_results=check_results,
602 previous_state=previous_state,
603 delete_threshold=max(1, delete_threshold),
604 )
605 stats.hard_fail_urls = hard_fail_urls
606 stats.transient_fail_urls = transient_fail_urls
607
608 (

Calls 11

SyncStatsClass · 0.85
collect_rss_urlsFunction · 0.85
HttpFeedCheckerClass · 0.85
check_urls_parallelFunction · 0.85
load_health_stateFunction · 0.85
build_next_health_stateFunction · 0.85
clean_treeFunction · 0.85
sync_tiny_to_fullFunction · 0.85
serialize_stateFunction · 0.85
get_bodyFunction · 0.70
serialize_treeFunction · 0.70