MCPcopy Index your code
hub / github.com/zer0yu/CyberSecurityRSS / HttpFeedChecker

Class HttpFeedChecker

scripts/opml_sync.py:194–280  ·  view source on GitHub ↗

Checks feed URL reachability and validates RSS/Atom root tag.

Source from the content-addressed store, hash-verified

192
193
194class HttpFeedChecker:
195 """Checks feed URL reachability and validates RSS/Atom root tag."""
196
197 def __init__(self, timeout: float, retries: int, user_agent: str, max_probe_bytes: int) -> None:
198 self.timeout = timeout
199 self.retries = retries
200 self.user_agent = user_agent
201 self.max_probe_bytes = max_probe_bytes
202
203 def __call__(self, url: str) -> FeedCheckResult:
204 if not is_http_url(url):
205 return FeedCheckResult(alive=False, kind="hard_fail", reason="unsupported_url_scheme")
206
207 headers = {
208 "User-Agent": self.user_agent,
209 "Accept": "application/rss+xml, application/atom+xml, application/xml, text/xml;q=0.9, */*;q=0.8",
210 "Accept-Encoding": "gzip, deflate, identity",
211 }
212
213 for attempt in range(1, self.retries + 1):
214 try:
215 request = urllib.request.Request(url=url, headers=headers, method="GET")
216 with urllib.request.urlopen(request, timeout=self.timeout) as response:
217 status = getattr(response, "status", response.getcode())
218 status_code = int(status) if status is not None else 0
219 if status_code < 200 or status_code >= 400:
220 return classify_http_error(status_code)
221
222 root_tag = first_root_tag_from_response(response, self.max_probe_bytes)
223 if root_tag in FEED_ROOT_TAGS:
224 return FeedCheckResult(
225 alive=True,
226 kind="alive",
227 reason="ok",
228 status_code=status_code,
229 )
230 if root_tag:
231 return FeedCheckResult(
232 alive=False,
233 kind="hard_fail",
234 reason=f"non_feed_root:{root_tag}",
235 status_code=status_code,
236 )
237
238 content_type = (response.headers.get("Content-Type") or "").lower()
239 if "html" in content_type or "json" in content_type:
240 return FeedCheckResult(
241 alive=False,
242 kind="hard_fail",
243 reason=f"non_xml_content_type:{content_type}",
244 status_code=status_code,
245 )
246 # Unknown body shape: avoid destructive delete on first signal.
247 return FeedCheckResult(
248 alive=False,
249 kind="transient_fail",
250 reason="root_tag_not_found",
251 status_code=status_code,

Callers 1

run_syncFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected