MCPcopy
hub / github.com/Fosowl/agenticSeek / is_link_valid

Method is_link_valid

sources/browser.py:436–452  ·  view source on GitHub ↗

Check if a URL is a valid link (page, not related to icon or metadata).

(self, url:str)

Source from the content-addressed store, hash-verified

434 return base_url
435
436 def is_link_valid(self, url:str) -> bool:
437 """Check if a URL is a valid link (page, not related to icon or metadata)."""
438 if len(url) > 72:
439 self.logger.warning(f"URL too long: {url}")
440 return False
441 parsed_url = urlparse(url)
442 if not parsed_url.scheme or not parsed_url.netloc:
443 self.logger.warning(f"Invalid URL: {url}")
444 return False
445 if re.search(r'/\d+$', parsed_url.path):
446 return False
447 image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']
448 metadata_extensions = ['.ico', '.xml', '.json', '.rss', '.atom']
449 for ext in image_extensions + metadata_extensions:
450 if url.lower().endswith(ext):
451 return False
452 return True
453
454 def get_navigable(self) -> List[str]:
455 """Get all navigable links on the current page."""

Callers 1

get_navigableMethod · 0.95

Calls 1

warningMethod · 0.80

Tested by

no test coverage detected