Function parse

module/lib/feedparser.py:3597–3803 · view source on GitHub ↗

Parse a feed from a URL, file, stream, or string. request_headers, if given, is a dict from http header name to value to add to the request; this overrides internally generated values.

(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={})

Source from the content-addressed store, hash-verified

3595	return version, data, dict(replacement and [(k.decode('utf-8'), v.decode('utf-8')) for k, v in safe_pattern.findall(replacement)])
3596
3597	def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={}):
3598	'''Parse a feed from a URL, file, stream, or string.
3599
3600	request_headers, if given, is a dict from http header name to value to add
3601	to the request; this overrides internally generated values.
3602	'''
3603	result = FeedParserDict()
3604	result['feed'] = FeedParserDict()
3605	result['entries'] = []
3606	if _XML_AVAILABLE:
3607	result['bozo'] = 0
3608	if not isinstance(handlers, list):
3609	handlers = [handlers]
3610	try:
3611	f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers)
3612	data = f.read()
3613	except Exception, e:
3614	result['bozo'] = 1
3615	result['bozo_exception'] = e
3616	data = None
3617	f = None
3618
3619	if hasattr(f, 'headers'):
3620	result['headers'] = dict(f.headers)
3621	# overwrite existing headers using response_headers
3622	if 'headers' in result:
3623	result['headers'].update(response_headers)
3624	elif response_headers:
3625	result['headers'] = copy.deepcopy(response_headers)
3626
3627	# if feed is gzip-compressed, decompress it
3628	if f and data and 'headers' in result:
3629	if gzip and result['headers'].get('content-encoding') == 'gzip':
3630	try:
3631	data = gzip.GzipFile(fileobj=_StringIO(data)).read()
3632	except Exception, e:
3633	# Some feeds claim to be gzipped but they're not, so
3634	# we get garbage. Ideally, we should re-request the
3635	# feed without the 'Accept-encoding: gzip' header,
3636	# but we don't.
3637	result['bozo'] = 1
3638	result['bozo_exception'] = e
3639	data = ''
3640	elif zlib and result['headers'].get('content-encoding') == 'deflate':
3641	try:
3642	data = zlib.decompress(data, -zlib.MAX_WBITS)
3643	except Exception, e:
3644	result['bozo'] = 1
3645	result['bozo_exception'] = e
3646	data = ''
3647
3648	# save HTTP headers
3649	if 'headers' in result:
3650	if 'etag' in result['headers'] or 'ETag' in result['headers']:
3651	etag = result['headers'].get('etag', result['headers'].get('ETag'))
3652	if etag:
3653	result['etag'] = etag
3654	if 'last-modified' in result['headers'] or 'Last-Modified' in result['headers']:

Callers 2

feedparser.pyFile · 0.85

parse_tupleMethod · 0.85

Calls 15

getMethod · 0.95

_open_resourceFunction · 0.85

dictFunction · 0.85

_parse_dateFunction · 0.85

_getCharacterEncodingFunction · 0.85

NonXMLContentTypeClass · 0.85

_stripDoctypeFunction · 0.85

_makeSafeAbsoluteURIFunction · 0.85

_toUTF8Function · 0.85

CharacterEncodingUnknownClass · 0.85

CharacterEncodingOverrideClass · 0.85

_StrictFeedParserClass · 0.85

Tested by

no test coverage detected