MCPcopy Index your code
hub / github.com/csev/py4e / __init__

Method __init__

code/BeautifulSoup.py:1764–1796  ·  view source on GitHub ↗
(self, markup, overrideEncodings=[],
                 smartQuotesTo='xml', isHTML=False)

Source from the content-addressed store, hash-verified

1762 "x-sjis" : "shift-jis" }
1763
1764 def __init__(self, markup, overrideEncodings=[],
1765 smartQuotesTo='xml', isHTML=False):
1766 self.declaredHTMLEncoding = None
1767 self.markup, documentEncoding, sniffedEncoding = \
1768 self._detectEncoding(markup, isHTML)
1769 self.smartQuotesTo = smartQuotesTo
1770 self.triedEncodings = []
1771 if markup == '' or isinstance(markup, unicode):
1772 self.originalEncoding = None
1773 self.unicode = unicode(markup)
1774 return
1775
1776 u = None
1777 for proposedEncoding in overrideEncodings:
1778 u = self._convertFrom(proposedEncoding)
1779 if u: break
1780 if not u:
1781 for proposedEncoding in (documentEncoding, sniffedEncoding):
1782 u = self._convertFrom(proposedEncoding)
1783 if u: break
1784
1785 # If no luck and we have auto-detection library, try that:
1786 if not u and chardet and not isinstance(self.markup, unicode):
1787 u = self._convertFrom(chardet.detect(self.markup)['encoding'])
1788
1789 # As a last resort, try utf-8 and windows-1252:
1790 if not u:
1791 for proposed_encoding in ("utf-8", "windows-1252"):
1792 u = self._convertFrom(proposed_encoding)
1793 if u: break
1794
1795 self.unicode = u
1796 if not u: self.originalEncoding = None
1797
1798 def _subMSChar(self, orig):
1799 """Changes a MS smart quote character to an XML or HTML

Callers

nothing calls this directly

Calls 2

_detectEncodingMethod · 0.95
_convertFromMethod · 0.95

Tested by

no test coverage detected