| 458 | self.assertEqual(result, expected) |
| 459 | |
| 460 | def test_real_shift_jis_document(self): |
| 461 | # Smoke test to make sure the parser can handle a document in |
| 462 | # Shift-JIS encoding, without choking. |
| 463 | shift_jis_html = ( |
| 464 | b'<html><head></head><body><pre>' |
| 465 | b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f' |
| 466 | b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c' |
| 467 | b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B' |
| 468 | b'</pre></body></html>') |
| 469 | unicode_html = shift_jis_html.decode("shift-jis") |
| 470 | soup = self.soup(unicode_html) |
| 471 | |
| 472 | # Make sure the parse tree is correctly encoded to various |
| 473 | # encodings. |
| 474 | self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8")) |
| 475 | self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp")) |
| 476 | |
| 477 | def test_real_hebrew_document(self): |
| 478 | # A real-world test to make sure we can convert ISO-8859-9 (a |