Parse a feed from a URL, file, stream, or string. request_headers, if given, is a dict from http header name to value to add to the request; this overrides internally generated values.
(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={})
| 3595 | return version, data, dict(replacement and [(k.decode('utf-8'), v.decode('utf-8')) for k, v in safe_pattern.findall(replacement)]) |
| 3596 | |
| 3597 | def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={}): |
| 3598 | '''Parse a feed from a URL, file, stream, or string. |
| 3599 | |
| 3600 | request_headers, if given, is a dict from http header name to value to add |
| 3601 | to the request; this overrides internally generated values. |
| 3602 | ''' |
| 3603 | result = FeedParserDict() |
| 3604 | result['feed'] = FeedParserDict() |
| 3605 | result['entries'] = [] |
| 3606 | if _XML_AVAILABLE: |
| 3607 | result['bozo'] = 0 |
| 3608 | if not isinstance(handlers, list): |
| 3609 | handlers = [handlers] |
| 3610 | try: |
| 3611 | f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers) |
| 3612 | data = f.read() |
| 3613 | except Exception, e: |
| 3614 | result['bozo'] = 1 |
| 3615 | result['bozo_exception'] = e |
| 3616 | data = None |
| 3617 | f = None |
| 3618 | |
| 3619 | if hasattr(f, 'headers'): |
| 3620 | result['headers'] = dict(f.headers) |
| 3621 | # overwrite existing headers using response_headers |
| 3622 | if 'headers' in result: |
| 3623 | result['headers'].update(response_headers) |
| 3624 | elif response_headers: |
| 3625 | result['headers'] = copy.deepcopy(response_headers) |
| 3626 | |
| 3627 | # if feed is gzip-compressed, decompress it |
| 3628 | if f and data and 'headers' in result: |
| 3629 | if gzip and result['headers'].get('content-encoding') == 'gzip': |
| 3630 | try: |
| 3631 | data = gzip.GzipFile(fileobj=_StringIO(data)).read() |
| 3632 | except Exception, e: |
| 3633 | # Some feeds claim to be gzipped but they're not, so |
| 3634 | # we get garbage. Ideally, we should re-request the |
| 3635 | # feed without the 'Accept-encoding: gzip' header, |
| 3636 | # but we don't. |
| 3637 | result['bozo'] = 1 |
| 3638 | result['bozo_exception'] = e |
| 3639 | data = '' |
| 3640 | elif zlib and result['headers'].get('content-encoding') == 'deflate': |
| 3641 | try: |
| 3642 | data = zlib.decompress(data, -zlib.MAX_WBITS) |
| 3643 | except Exception, e: |
| 3644 | result['bozo'] = 1 |
| 3645 | result['bozo_exception'] = e |
| 3646 | data = '' |
| 3647 | |
| 3648 | # save HTTP headers |
| 3649 | if 'headers' in result: |
| 3650 | if 'etag' in result['headers'] or 'ETag' in result['headers']: |
| 3651 | etag = result['headers'].get('etag', result['headers'].get('ETag')) |
| 3652 | if etag: |
| 3653 | result['etag'] = etag |
| 3654 | if 'last-modified' in result['headers'] or 'Last-Modified' in result['headers']: |
no test coverage detected