Apply the steps of ftfy that detect mojibake and fix it. Returns the fixed text and a list explaining what was fixed. This includes fixing text by encoding and decoding it in different encodings, as well as the subordinate fixes `restore_byte_a0`, `replace_lossy_sequences`, `de
(
text: str, config: TextFixerConfig | None = None, **kwargs: Any
)
| 422 | |
| 423 | |
| 424 | def fix_encoding_and_explain( |
| 425 | text: str, config: TextFixerConfig | None = None, **kwargs: Any |
| 426 | ) -> ExplainedText: |
| 427 | """ |
| 428 | Apply the steps of ftfy that detect mojibake and fix it. Returns the fixed |
| 429 | text and a list explaining what was fixed. |
| 430 | |
| 431 | This includes fixing text by encoding and decoding it in different encodings, |
| 432 | as well as the subordinate fixes `restore_byte_a0`, `replace_lossy_sequences`, |
| 433 | `decode_inconsistent_utf8`, and `fix_c1_controls`. |
| 434 | |
| 435 | Examples:: |
| 436 | |
| 437 | >>> fix_encoding_and_explain("só") |
| 438 | ExplainedText(text='só', explanation=[('encode', 'latin-1'), ('decode', 'utf-8')]) |
| 439 | |
| 440 | >>> result = fix_encoding_and_explain("voilà le travail") |
| 441 | >>> result.text |
| 442 | 'voilà le travail' |
| 443 | >>> result.explanation |
| 444 | [('encode', 'latin-1'), ('transcode', 'restore_byte_a0'), ('decode', 'utf-8')] |
| 445 | |
| 446 | """ |
| 447 | if config is None: |
| 448 | config = TextFixerConfig() |
| 449 | if isinstance(text, bytes): |
| 450 | raise UnicodeError(BYTES_ERROR_TEXT) |
| 451 | config = _config_from_kwargs(config, kwargs) |
| 452 | |
| 453 | if not config.fix_encoding: |
| 454 | # A weird trivial case: we're asked to fix the encoding, but skip |
| 455 | # fixing the encoding |
| 456 | return ExplainedText(text, []) |
| 457 | |
| 458 | plan_so_far: list[ExplanationStep] = [] |
| 459 | while True: |
| 460 | prevtext = text |
| 461 | text, plan = _fix_encoding_one_step_and_explain(text, config) |
| 462 | if plan is not None: |
| 463 | plan_so_far.extend(plan) |
| 464 | if text == prevtext: |
| 465 | return ExplainedText(text, plan_so_far) |
| 466 | |
| 467 | |
| 468 | def _fix_encoding_one_step_and_explain( |
no test coverage detected