(self, markup, overrideEncodings=[],
smartQuotesTo='xml', isHTML=False)
| 1762 | "x-sjis" : "shift-jis" } |
| 1763 | |
| 1764 | def __init__(self, markup, overrideEncodings=[], |
| 1765 | smartQuotesTo='xml', isHTML=False): |
| 1766 | self.declaredHTMLEncoding = None |
| 1767 | self.markup, documentEncoding, sniffedEncoding = \ |
| 1768 | self._detectEncoding(markup, isHTML) |
| 1769 | self.smartQuotesTo = smartQuotesTo |
| 1770 | self.triedEncodings = [] |
| 1771 | if markup == '' or isinstance(markup, unicode): |
| 1772 | self.originalEncoding = None |
| 1773 | self.unicode = unicode(markup) |
| 1774 | return |
| 1775 | |
| 1776 | u = None |
| 1777 | for proposedEncoding in overrideEncodings: |
| 1778 | u = self._convertFrom(proposedEncoding) |
| 1779 | if u: break |
| 1780 | if not u: |
| 1781 | for proposedEncoding in (documentEncoding, sniffedEncoding): |
| 1782 | u = self._convertFrom(proposedEncoding) |
| 1783 | if u: break |
| 1784 | |
| 1785 | # If no luck and we have auto-detection library, try that: |
| 1786 | if not u and chardet and not isinstance(self.markup, unicode): |
| 1787 | u = self._convertFrom(chardet.detect(self.markup)['encoding']) |
| 1788 | |
| 1789 | # As a last resort, try utf-8 and windows-1252: |
| 1790 | if not u: |
| 1791 | for proposed_encoding in ("utf-8", "windows-1252"): |
| 1792 | u = self._convertFrom(proposed_encoding) |
| 1793 | if u: break |
| 1794 | |
| 1795 | self.unicode = u |
| 1796 | if not u: self.originalEncoding = None |
| 1797 | |
| 1798 | def _subMSChar(self, orig): |
| 1799 | """Changes a MS smart quote character to an XML or HTML |
nothing calls this directly
no test coverage detected