| 968 | # the most common cases. As false positives often result in silent |
| 969 | # data loss, this function errs on the conservative side. |
| 970 | def lookslikehtml(self, s): |
| 971 | if self.version.startswith('atom'): return |
| 972 | if self.contentparams.get('type','text/html') != 'text/plain': return |
| 973 | |
| 974 | # must have a close tag or a entity reference to qualify |
| 975 | if not (re.search(r'</(\w+)>',s) or re.search("&#?\w+;",s)): return |
| 976 | |
| 977 | # all tags must be in a restricted subset of valid HTML tags |
| 978 | if filter(lambda t: t.lower() not in _HTMLSanitizer.acceptable_elements, |
| 979 | re.findall(r'</?(\w+)',s)): return |
| 980 | |
| 981 | # all entities must have been defined as valid HTML entities |
| 982 | from htmlentitydefs import entitydefs |
| 983 | if filter(lambda e: e not in entitydefs.keys(), |
| 984 | re.findall(r'&(\w+);',s)): return |
| 985 | |
| 986 | return 1 |
| 987 | |
| 988 | def _mapToStandardPrefix(self, name): |
| 989 | colonpos = name.find(':') |