| 549 | |
| 550 | |
| 551 | def run_sync( |
| 552 | tiny_path: Path, |
| 553 | full_path: Path, |
| 554 | mode: str, |
| 555 | fallback_category: str, |
| 556 | timeout: float, |
| 557 | retries: int, |
| 558 | workers: int, |
| 559 | state_file: Path, |
| 560 | delete_threshold: int, |
| 561 | max_probe_bytes: int, |
| 562 | checker: Optional[Callable[[str], Any]] = None, |
| 563 | ) -> Tuple[SyncStats, bool]: |
| 564 | tiny_path = Path(tiny_path) |
| 565 | full_path = Path(full_path) |
| 566 | state_file = Path(state_file) |
| 567 | |
| 568 | if mode not in {"check", "apply"}: |
| 569 | raise ValueError(f"Unsupported mode: {mode}") |
| 570 | |
| 571 | tiny_original = tiny_path.read_bytes() |
| 572 | full_original = full_path.read_bytes() |
| 573 | state_original = state_file.read_bytes() if state_file.exists() else b"" |
| 574 | |
| 575 | tiny_tree = ET.parse(tiny_path) |
| 576 | full_tree = ET.parse(full_path) |
| 577 | |
| 578 | tiny_body = get_body(tiny_tree, tiny_path) |
| 579 | full_body = get_body(full_tree, full_path) |
| 580 | |
| 581 | stats = SyncStats() |
| 582 | stats.tiny_links_before = len(collect_rss_urls(tiny_body)) |
| 583 | stats.full_links_before = len(collect_rss_urls(full_body)) |
| 584 | |
| 585 | all_urls = set(collect_rss_urls(tiny_body)) | set(collect_rss_urls(full_body)) |
| 586 | check_fn = checker or HttpFeedChecker( |
| 587 | timeout=timeout, |
| 588 | retries=retries, |
| 589 | user_agent=DEFAULT_USER_AGENT, |
| 590 | max_probe_bytes=max_probe_bytes, |
| 591 | ) |
| 592 | check_results = check_urls_parallel(all_urls, check_fn, workers=workers) |
| 593 | |
| 594 | stats.checked_urls = len(check_results) |
| 595 | stats.alive_urls = sum(1 for result in check_results.values() if result.alive) |
| 596 | stats.dead_urls = stats.checked_urls - stats.alive_urls |
| 597 | |
| 598 | previous_state = load_health_state(state_file) |
| 599 | next_state_payload, removable_urls, hard_fail_urls, transient_fail_urls = build_next_health_state( |
| 600 | urls=all_urls, |
| 601 | check_results=check_results, |
| 602 | previous_state=previous_state, |
| 603 | delete_threshold=max(1, delete_threshold), |
| 604 | ) |
| 605 | stats.hard_fail_urls = hard_fail_urls |
| 606 | stats.transient_fail_urls = transient_fail_urls |
| 607 | |
| 608 | ( |