Method lookslikehtml

module/lib/feedparser.py:970–986 · view source on GitHub ↗

(self, s)

Source from the content-addressed store, hash-verified

968	# the most common cases. As false positives often result in silent
969	# data loss, this function errs on the conservative side.
970	def lookslikehtml(self, s):
971	if self.version.startswith('atom'): return
972	if self.contentparams.get('type','text/html') != 'text/plain': return
973
974	# must have a close tag or a entity reference to qualify
975	if not (re.search(r'</(\w+)>',s) or re.search("&#?\w+;",s)): return
976
977	# all tags must be in a restricted subset of valid HTML tags
978	if filter(lambda t: t.lower() not in _HTMLSanitizer.acceptable_elements,
979	re.findall(r'</?(\w+)',s)): return
980
981	# all entities must have been defined as valid HTML entities
982	from htmlentitydefs import entitydefs
983	if filter(lambda e: e not in entitydefs.keys(),
984	re.findall(r'&(\w+);',s)): return
985
986	return 1
987
988	def _mapToStandardPrefix(self, name):
989	colonpos = name.find(':')

popMethod · 0.95

filterFunction · 0.85

getMethod · 0.45

searchMethod · 0.45

keysMethod · 0.45

no test coverage detected