(self)
| 785 | self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås') |
| 786 | |
| 787 | def testRewrittenXMLHeader(self): |
| 788 | euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n' |
| 789 | utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n" |
| 790 | soup = BeautifulStoneSoup(euc_jp) |
| 791 | if soup.originalEncoding != "euc-jp": |
| 792 | raise Exception("Test failed when parsing euc-jp document. " |
| 793 | "If you're running Python >=2.4, or you have " |
| 794 | "cjkcodecs installed, this is a real problem. " |
| 795 | "Otherwise, ignore it.") |
| 796 | |
| 797 | self.assertEquals(soup.originalEncoding, "euc-jp") |
| 798 | self.assertEquals(str(soup), utf8) |
| 799 | |
| 800 | old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>" |
| 801 | new_text = "<?xml version='1.0' encoding='utf-8'?><foo>’</foo>" |
| 802 | self.assertSoupEquals(old_text, new_text) |
| 803 | |
| 804 | def testRewrittenMetaTag(self): |
| 805 | no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>''' |
nothing calls this directly
no test coverage detected