MCPcopy
hub / github.com/zer0yu/CyberSecurityRSS / parse_feed_metadata_stream

Function parse_feed_metadata_stream

scripts/add_feed_to_tiny.py:193–281  ·  view source on GitHub ↗
(
    stream: BinaryIO,
    feed_url: str,
    max_bytes: int = MAX_FEED_BYTES,
)

Source from the content-addressed store, hash-verified

191
192
193def parse_feed_metadata_stream(
194 stream: BinaryIO,
195 feed_url: str,
196 max_bytes: int = MAX_FEED_BYTES,
197) -> FeedMetadata:
198 parser = ET.XMLPullParser(events=("start", "end"))
199 path: List[str] = []
200 root_name = ""
201 title = ""
202 html_url = ""
203 atom_fallback_html_url = ""
204 bytes_read = 0
205
206 while True:
207 chunk = stream.read(READ_CHUNK_SIZE)
208 if not chunk:
209 break
210 bytes_read += len(chunk)
211 if bytes_read > max_bytes:
212 raise ValueError(f"Feed payload is too large (> {max_bytes} bytes)")
213
214 try:
215 parser.feed(chunk)
216 except ET.ParseError as exc:
217 raise ValueError(f"RSS/Atom XML parse failed: {exc}") from exc
218
219 for event, elem in parser.read_events():
220 local_name = strip_namespace(elem.tag)
221
222 if event == "start":
223 path.append(local_name)
224
225 if len(path) == 1:
226 root_name = local_name
227 if root_name not in {"rss", "feed", "rdf"}:
228 raise ValueError(f"Unsupported feed root tag: {root_name}")
229 elif root_name == "feed" and len(path) == 2:
230 if local_name == "entry":
231 return _build_feed_metadata(
232 root_name,
233 title,
234 html_url or atom_fallback_html_url,
235 feed_url,
236 )
237 if local_name == "link":
238 href = normalize_url(elem.attrib.get("href", ""))
239 if href:
240 rel = (elem.attrib.get("rel", "alternate") or "alternate").strip().lower()
241 if rel in {"alternate", ""}:
242 html_url = href
243 elif not atom_fallback_html_url:
244 atom_fallback_html_url = href
245 if title and html_url:
246 return _build_feed_metadata(root_name, title, html_url, feed_url)
247 continue
248
249 if root_name in {"rss", "rdf"}:
250 if len(path) == 3 and path[1] == "channel":

Callers 1

fetch_feed_metadataFunction · 0.85

Calls 4

_build_feed_metadataFunction · 0.85
readMethod · 0.80
strip_namespaceFunction · 0.70
normalize_urlFunction · 0.70

Tested by

no test coverage detected