MCPcopy
hub / github.com/pyload/pyload / parse

Function parse

module/lib/feedparser.py:3597–3803  ·  view source on GitHub ↗

Parse a feed from a URL, file, stream, or string. request_headers, if given, is a dict from http header name to value to add to the request; this overrides internally generated values.

(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={})

Source from the content-addressed store, hash-verified

3595 return version, data, dict(replacement and [(k.decode('utf-8'), v.decode('utf-8')) for k, v in safe_pattern.findall(replacement)])
3596
3597def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={}):
3598 '''Parse a feed from a URL, file, stream, or string.
3599
3600 request_headers, if given, is a dict from http header name to value to add
3601 to the request; this overrides internally generated values.
3602 '''
3603 result = FeedParserDict()
3604 result['feed'] = FeedParserDict()
3605 result['entries'] = []
3606 if _XML_AVAILABLE:
3607 result['bozo'] = 0
3608 if not isinstance(handlers, list):
3609 handlers = [handlers]
3610 try:
3611 f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers)
3612 data = f.read()
3613 except Exception, e:
3614 result['bozo'] = 1
3615 result['bozo_exception'] = e
3616 data = None
3617 f = None
3618
3619 if hasattr(f, 'headers'):
3620 result['headers'] = dict(f.headers)
3621 # overwrite existing headers using response_headers
3622 if 'headers' in result:
3623 result['headers'].update(response_headers)
3624 elif response_headers:
3625 result['headers'] = copy.deepcopy(response_headers)
3626
3627 # if feed is gzip-compressed, decompress it
3628 if f and data and 'headers' in result:
3629 if gzip and result['headers'].get('content-encoding') == 'gzip':
3630 try:
3631 data = gzip.GzipFile(fileobj=_StringIO(data)).read()
3632 except Exception, e:
3633 # Some feeds claim to be gzipped but they're not, so
3634 # we get garbage. Ideally, we should re-request the
3635 # feed without the 'Accept-encoding: gzip' header,
3636 # but we don't.
3637 result['bozo'] = 1
3638 result['bozo_exception'] = e
3639 data = ''
3640 elif zlib and result['headers'].get('content-encoding') == 'deflate':
3641 try:
3642 data = zlib.decompress(data, -zlib.MAX_WBITS)
3643 except Exception, e:
3644 result['bozo'] = 1
3645 result['bozo_exception'] = e
3646 data = ''
3647
3648 # save HTTP headers
3649 if 'headers' in result:
3650 if 'etag' in result['headers'] or 'ETag' in result['headers']:
3651 etag = result['headers'].get('etag', result['headers'].get('ETag'))
3652 if etag:
3653 result['etag'] = etag
3654 if 'last-modified' in result['headers'] or 'Last-Modified' in result['headers']:

Callers 2

feedparser.pyFile · 0.85
parse_tupleMethod · 0.85

Calls 15

getMethod · 0.95
_open_resourceFunction · 0.85
dictFunction · 0.85
_parse_dateFunction · 0.85
_getCharacterEncodingFunction · 0.85
NonXMLContentTypeClass · 0.85
_stripDoctypeFunction · 0.85
_makeSafeAbsoluteURIFunction · 0.85
_toUTF8Function · 0.85
_StrictFeedParserClass · 0.85

Tested by

no test coverage detected