MCPcopy Index your code
hub / github.com/clips/pattern / parse

Function parse

pattern/web/feed/feedparser.py:3781–4017  ·  view source on GitHub ↗

Parse a feed from a URL, file, stream, or string. request_headers, if given, is a dict from http header name to value to add to the request; this overrides internally generated values.

(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=None, request_headers=None, response_headers=None)

Source from the content-addressed store, hash-verified

3779 return version, data, dict(replacement and [(k.decode('utf-8'), v.decode('utf-8')) for k, v in safe_pattern.findall(replacement)])
3780
3781def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=None, request_headers=None, response_headers=None):
3782 '''Parse a feed from a URL, file, stream, or string.
3783
3784 request_headers, if given, is a dict from http header name to value to add
3785 to the request; this overrides internally generated values.
3786 '''
3787
3788 if handlers is None:
3789 handlers = []
3790 if request_headers is None:
3791 request_headers = {}
3792 if response_headers is None:
3793 response_headers = {}
3794
3795 result = FeedParserDict()
3796 result['feed'] = FeedParserDict()
3797 result['entries'] = []
3798 result['bozo'] = 0
3799 if not isinstance(handlers, list):
3800 handlers = [handlers]
3801 try:
3802 f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers)
3803 data = f.read()
3804 except Exception, e:
3805 result['bozo'] = 1
3806 result['bozo_exception'] = e
3807 data = None
3808 f = None
3809
3810 if hasattr(f, 'headers'):
3811 result['headers'] = dict(f.headers)
3812 # overwrite existing headers using response_headers
3813 if 'headers' in result:
3814 result['headers'].update(response_headers)
3815 elif response_headers:
3816 result['headers'] = copy.deepcopy(response_headers)
3817
3818 # lowercase all of the HTTP headers for comparisons per RFC 2616
3819 if 'headers' in result:
3820 http_headers = dict((k.lower(), v) for k, v in result['headers'].items())
3821 else:
3822 http_headers = {}
3823
3824 # if feed is gzip-compressed, decompress it
3825 if f and data and http_headers:
3826 if gzip and 'gzip' in http_headers.get('content-encoding', ''):
3827 try:
3828 data = gzip.GzipFile(fileobj=_StringIO(data)).read()
3829 except (IOError, struct.error), e:
3830 # IOError can occur if the gzip header is bad.
3831 # struct.error can occur if the data is damaged.
3832 result['bozo'] = 1
3833 result['bozo_exception'] = e
3834 if isinstance(e, struct.error):
3835 # A gzip header was found but the data is corrupt.
3836 # Ideally, we should re-request the feed without the
3837 # 'Accept-encoding: gzip' header, but we don't.
3838 data = None

Callers 1

streamFunction · 0.50

Calls 15

getMethod · 0.95
FeedParserDictClass · 0.85
_open_resourceFunction · 0.85
_parse_dateFunction · 0.85
_getCharacterEncodingFunction · 0.85
NonXMLContentTypeClass · 0.85
_makeSafeAbsoluteURIFunction · 0.85
_toUTF8Function · 0.85
_stripDoctypeFunction · 0.85
_StrictFeedParserClass · 0.85

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…