| 475 | self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp")) |
| 476 | |
| 477 | def test_real_hebrew_document(self): |
| 478 | # A real-world test to make sure we can convert ISO-8859-9 (a |
| 479 | # Hebrew encoding) to UTF-8. |
| 480 | hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>' |
| 481 | soup = self.soup( |
| 482 | hebrew_document, from_encoding="iso8859-8") |
| 483 | self.assertEqual(soup.original_encoding, 'iso8859-8') |
| 484 | self.assertEqual( |
| 485 | soup.encode('utf-8'), |
| 486 | hebrew_document.decode("iso8859-8").encode("utf-8")) |
| 487 | |
| 488 | def test_meta_tag_reflects_current_encoding(self): |
| 489 | # Here's the <meta> tag saying that a document is |