MCPcopy
hub / github.com/hesreallyhim/awesome-claude-code / validate_url

Function validate_url

scripts/validation/validate_links.py:662–743  ·  view source on GitHub ↗

Validate a URL with exponential backoff retry logic. Returns (is_valid, status_code, license_info, last_modified).

(
    url: str, max_retries: int = 5
)

Source from the content-addressed store, hash-verified

660
661
662def validate_url(
663 url: str, max_retries: int = 5
664) -> tuple[bool, int | str | None, str | None, str | None]:
665 """
666 Validate a URL with exponential backoff retry logic.
667 Returns (is_valid, status_code, license_info, last_modified).
668 """
669 if not url or url.strip() == "":
670 return True, None, None, None # Empty URLs are considered valid
671
672 # Convert GitHub URLs to API endpoints
673 api_url, is_github, owner, repo = parse_github_url(url)
674
675 for attempt in range(max_retries):
676 try:
677 if is_github:
678 status, headers, data = github_request_json_paced(api_url)
679 else:
680 response = requests.head(url, headers=HEADERS, timeout=10, allow_redirects=True)
681 status = response.status_code
682 headers = dict(response.headers)
683 data = None
684
685 if is_github and VERBOSE:
686 print(f"[github] url={url} api={api_url} status={status}")
687 print(f"[github-body] {data}")
688
689 # Check if we hit GitHub rate limit
690 if status == 403 and is_github and "X-RateLimit-Remaining" in headers:
691 remaining = _header_int(headers, "X-RateLimit-Remaining")
692 if remaining == 0:
693 reset_time = _header_int(headers, "X-RateLimit-Reset")
694 sleep_time = max(reset_time - int(time.time()), 0) + 1
695 print(f"GitHub rate limit hit. Sleeping for {sleep_time} seconds...")
696 time.sleep(sleep_time)
697 continue
698
699 # Success cases
700 if status < 400:
701 license_info = None
702 last_modified = None
703 if is_github and status == 200:
704 # Extract owner/repo/path from original URL
705 # Try to match file URL first
706 file_match = re.match(
707 r"https://github\.com/([^/]+)/([^/]+)/blob/[^/]+/(.+)", url
708 )
709 if file_match:
710 owner, repo, path = file_match.groups()
711 license_info = get_github_license(owner, repo)
712 last_modified = get_github_last_modified(owner, repo, path)
713 else:
714 # Try repository URL
715 repo_match = re.match(r"https://github\.com/([^/]+)/([^/]+)", url)
716 if repo_match:
717 owner, repo = repo_match.groups()
718 license_info = get_github_license(owner, repo)
719 last_modified = get_github_last_modified(owner, repo)

Callers 2

validate_single_resourceFunction · 0.90
validate_linksFunction · 0.85

Calls 5

parse_github_urlFunction · 0.90
_header_intFunction · 0.85
get_github_licenseFunction · 0.85
get_github_last_modifiedFunction · 0.85

Tested by

no test coverage detected