MCPcopy Index your code
hub / github.com/pyload/pyload / _sanitizeHTML

Function _sanitizeHTML

module/lib/feedparser.py:2725–2763  ·  view source on GitHub ↗
(htmlSource, encoding, _type)

Source from the content-addressed store, hash-verified

2723
2724
2725def _sanitizeHTML(htmlSource, encoding, _type):
2726 p = _HTMLSanitizer(encoding, _type)
2727 htmlSource = htmlSource.replace('<![CDATA[', '&lt;![CDATA[')
2728 p.feed(htmlSource)
2729 data = p.output()
2730 if TIDY_MARKUP:
2731 # loop through list of preferred Tidy interfaces looking for one that's installed,
2732 # then set up a common _tidy function to wrap the interface-specific API.
2733 _tidy = None
2734 for tidy_interface in PREFERRED_TIDY_INTERFACES:
2735 try:
2736 if tidy_interface == "uTidy":
2737 from tidy import parseString as _utidy
2738 def _tidy(data, **kwargs):
2739 return str(_utidy(data, **kwargs))
2740 break
2741 elif tidy_interface == "mxTidy":
2742 from mx.Tidy import Tidy as _mxtidy
2743 def _tidy(data, **kwargs):
2744 nerrors, nwarnings, data, errordata = _mxtidy.tidy(data, **kwargs)
2745 return data
2746 break
2747 except:
2748 pass
2749 if _tidy:
2750 utf8 = type(data) == type(u'')
2751 if utf8:
2752 data = data.encode('utf-8')
2753 data = _tidy(data, output_xhtml=1, numeric_entities=1, wrap=0, char_encoding="utf8")
2754 if utf8:
2755 data = unicode(data, 'utf-8')
2756 if data.count('<body'):
2757 data = data.split('<body', 1)[1]
2758 if data.count('>'):
2759 data = data.split('>', 1)[1]
2760 if data.count('</body'):
2761 data = data.split('</body', 1)[0]
2762 data = data.strip().replace('\r\n', '\n')
2763 return data
2764
2765class _FeedURLHandler(urllib2.HTTPDigestAuthHandler, urllib2.HTTPRedirectHandler, urllib2.HTTPDefaultErrorHandler):
2766 def http_error_default(self, req, fp, code, msg, headers):

Callers 1

popMethod · 0.85

Calls 7

_HTMLSanitizerClass · 0.85
_tidyFunction · 0.85
feedMethod · 0.80
outputMethod · 0.80
splitMethod · 0.80
replaceMethod · 0.45
encodeMethod · 0.45

Tested by

no test coverage detected