| 2696 | _BaseHTMLProcessor.handle_data(self, text) |
| 2697 | |
| 2698 | def sanitize_style(self, style): |
| 2699 | # disallow urls |
| 2700 | style=re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ',style) |
| 2701 | |
| 2702 | # gauntlet |
| 2703 | if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return '' |
| 2704 | # This replaced a regexp that used re.match and was prone to pathological back-tracking. |
| 2705 | if re.sub("\s*[-\w]+\s*:\s*[^:;]*;?", '', style).strip(): return '' |
| 2706 | |
| 2707 | clean = [] |
| 2708 | for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style): |
| 2709 | if not value: continue |
| 2710 | if prop.lower() in self.acceptable_css_properties: |
| 2711 | clean.append(prop + ': ' + value + ';') |
| 2712 | elif prop.split('-')[0].lower() in ['background','border','margin','padding']: |
| 2713 | for keyword in value.split(): |
| 2714 | if not keyword in self.acceptable_css_keywords and \ |
| 2715 | not self.valid_css_values.match(keyword): |
| 2716 | break |
| 2717 | else: |
| 2718 | clean.append(prop + ': ' + value + ';') |
| 2719 | elif self.svgOK and prop.lower() in self.acceptable_svg_properties: |
| 2720 | clean.append(prop + ': ' + value + ';') |
| 2721 | |
| 2722 | return ' '.join(clean) |
| 2723 | |
| 2724 | |
| 2725 | def _sanitizeHTML(htmlSource, encoding, _type): |