MCPcopy
hub / github.com/zer0yu/CyberSecurityRSS / clean_tree

Function clean_tree

scripts/opml_sync.py:375–424  ·  view source on GitHub ↗
(
    tree: ET.ElementTree,
    path: Path,
    check_results: Dict[str, FeedCheckResult],
    removable_urls: Set[str],
)

Source from the content-addressed store, hash-verified

373
374
375def clean_tree(
376 tree: ET.ElementTree,
377 path: Path,
378 check_results: Dict[str, FeedCheckResult],
379 removable_urls: Set[str],
380) -> Tuple[int, int, int]:
381 body = get_body(tree, path)
382 seen_urls: Set[str] = set()
383 dead_removed = 0
384 dup_removed = 0
385 retained_failed = 0
386
387 def visit(parent: ET.Element) -> None:
388 nonlocal dead_removed, dup_removed, retained_failed
389 for child in list(parent):
390 if child.tag != "outline":
391 continue
392 if is_rss_outline(child):
393 raw = child.attrib.get("xmlUrl", "")
394 url = normalize_url(raw)
395 if not url:
396 parent.remove(child)
397 dead_removed += 1
398 continue
399 if raw != url:
400 child.attrib["xmlUrl"] = url
401
402 result = check_results.get(
403 url,
404 FeedCheckResult(alive=False, kind="transient_fail", reason="missing_check_result"),
405 )
406 should_remove_dead = (not result.alive) and (url in removable_urls)
407 if should_remove_dead:
408 parent.remove(child)
409 dead_removed += 1
410 continue
411
412 if not result.alive:
413 retained_failed += 1
414
415 if url in seen_urls:
416 parent.remove(child)
417 dup_removed += 1
418 continue
419 seen_urls.add(url)
420 continue
421 visit(child)
422
423 visit(body)
424 return dead_removed, dup_removed, retained_failed
425
426
427def top_level_categories(body: ET.Element) -> List[ET.Element]:

Callers 1

run_syncFunction · 0.85

Calls 2

visitFunction · 0.85
get_bodyFunction · 0.70

Tested by

no test coverage detected