Parse a feed from a URL, file, stream, or string. request_headers, if given, is a dict from http header name to value to add to the request; this overrides internally generated values.
(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=None, request_headers=None, response_headers=None)
| 3779 | return version, data, dict(replacement and [(k.decode('utf-8'), v.decode('utf-8')) for k, v in safe_pattern.findall(replacement)]) |
| 3780 | |
| 3781 | def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=None, request_headers=None, response_headers=None): |
| 3782 | '''Parse a feed from a URL, file, stream, or string. |
| 3783 | |
| 3784 | request_headers, if given, is a dict from http header name to value to add |
| 3785 | to the request; this overrides internally generated values. |
| 3786 | ''' |
| 3787 | |
| 3788 | if handlers is None: |
| 3789 | handlers = [] |
| 3790 | if request_headers is None: |
| 3791 | request_headers = {} |
| 3792 | if response_headers is None: |
| 3793 | response_headers = {} |
| 3794 | |
| 3795 | result = FeedParserDict() |
| 3796 | result['feed'] = FeedParserDict() |
| 3797 | result['entries'] = [] |
| 3798 | result['bozo'] = 0 |
| 3799 | if not isinstance(handlers, list): |
| 3800 | handlers = [handlers] |
| 3801 | try: |
| 3802 | f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers) |
| 3803 | data = f.read() |
| 3804 | except Exception, e: |
| 3805 | result['bozo'] = 1 |
| 3806 | result['bozo_exception'] = e |
| 3807 | data = None |
| 3808 | f = None |
| 3809 | |
| 3810 | if hasattr(f, 'headers'): |
| 3811 | result['headers'] = dict(f.headers) |
| 3812 | # overwrite existing headers using response_headers |
| 3813 | if 'headers' in result: |
| 3814 | result['headers'].update(response_headers) |
| 3815 | elif response_headers: |
| 3816 | result['headers'] = copy.deepcopy(response_headers) |
| 3817 | |
| 3818 | # lowercase all of the HTTP headers for comparisons per RFC 2616 |
| 3819 | if 'headers' in result: |
| 3820 | http_headers = dict((k.lower(), v) for k, v in result['headers'].items()) |
| 3821 | else: |
| 3822 | http_headers = {} |
| 3823 | |
| 3824 | # if feed is gzip-compressed, decompress it |
| 3825 | if f and data and http_headers: |
| 3826 | if gzip and 'gzip' in http_headers.get('content-encoding', ''): |
| 3827 | try: |
| 3828 | data = gzip.GzipFile(fileobj=_StringIO(data)).read() |
| 3829 | except (IOError, struct.error), e: |
| 3830 | # IOError can occur if the gzip header is bad. |
| 3831 | # struct.error can occur if the data is damaged. |
| 3832 | result['bozo'] = 1 |
| 3833 | result['bozo_exception'] = e |
| 3834 | if isinstance(e, struct.error): |
| 3835 | # A gzip header was found but the data is corrupt. |
| 3836 | # Ideally, we should re-request the feed without the |
| 3837 | # 'Accept-encoding: gzip' header, but we don't. |
| 3838 | data = None |
no test coverage detected
searching dependent graphs…