(self, url: str)
| 201 | self.max_probe_bytes = max_probe_bytes |
| 202 | |
| 203 | def __call__(self, url: str) -> FeedCheckResult: |
| 204 | if not is_http_url(url): |
| 205 | return FeedCheckResult(alive=False, kind="hard_fail", reason="unsupported_url_scheme") |
| 206 | |
| 207 | headers = { |
| 208 | "User-Agent": self.user_agent, |
| 209 | "Accept": "application/rss+xml, application/atom+xml, application/xml, text/xml;q=0.9, */*;q=0.8", |
| 210 | "Accept-Encoding": "gzip, deflate, identity", |
| 211 | } |
| 212 | |
| 213 | for attempt in range(1, self.retries + 1): |
| 214 | try: |
| 215 | request = urllib.request.Request(url=url, headers=headers, method="GET") |
| 216 | with urllib.request.urlopen(request, timeout=self.timeout) as response: |
| 217 | status = getattr(response, "status", response.getcode()) |
| 218 | status_code = int(status) if status is not None else 0 |
| 219 | if status_code < 200 or status_code >= 400: |
| 220 | return classify_http_error(status_code) |
| 221 | |
| 222 | root_tag = first_root_tag_from_response(response, self.max_probe_bytes) |
| 223 | if root_tag in FEED_ROOT_TAGS: |
| 224 | return FeedCheckResult( |
| 225 | alive=True, |
| 226 | kind="alive", |
| 227 | reason="ok", |
| 228 | status_code=status_code, |
| 229 | ) |
| 230 | if root_tag: |
| 231 | return FeedCheckResult( |
| 232 | alive=False, |
| 233 | kind="hard_fail", |
| 234 | reason=f"non_feed_root:{root_tag}", |
| 235 | status_code=status_code, |
| 236 | ) |
| 237 | |
| 238 | content_type = (response.headers.get("Content-Type") or "").lower() |
| 239 | if "html" in content_type or "json" in content_type: |
| 240 | return FeedCheckResult( |
| 241 | alive=False, |
| 242 | kind="hard_fail", |
| 243 | reason=f"non_xml_content_type:{content_type}", |
| 244 | status_code=status_code, |
| 245 | ) |
| 246 | # Unknown body shape: avoid destructive delete on first signal. |
| 247 | return FeedCheckResult( |
| 248 | alive=False, |
| 249 | kind="transient_fail", |
| 250 | reason="root_tag_not_found", |
| 251 | status_code=status_code, |
| 252 | ) |
| 253 | except urllib.error.HTTPError as exc: |
| 254 | result = classify_http_error(int(exc.code)) |
| 255 | except ( |
| 256 | urllib.error.URLError, |
| 257 | socket.timeout, |
| 258 | TimeoutError, |
| 259 | ConnectionError, |
| 260 | OSError, |
nothing calls this directly
no test coverage detected