Fix text that is found in a file. If the file is being read as Unicode text, use that. If it's being read as bytes, then we hope an encoding was supplied. If not, unfortunately, we have to guess what encoding it is. We'll try a few common encodings, but we make no promises. See
(
input_file: TextIO | BinaryIO,
encoding: str | None = None,
config: TextFixerConfig | None = None,
**kwargs: Any,
)
| 621 | |
| 622 | |
| 623 | def fix_file( |
| 624 | input_file: TextIO | BinaryIO, |
| 625 | encoding: str | None = None, |
| 626 | config: TextFixerConfig | None = None, |
| 627 | **kwargs: Any, |
| 628 | ) -> Iterator[str]: |
| 629 | """ |
| 630 | Fix text that is found in a file. |
| 631 | |
| 632 | If the file is being read as Unicode text, use that. If it's being read as |
| 633 | bytes, then we hope an encoding was supplied. If not, unfortunately, we |
| 634 | have to guess what encoding it is. We'll try a few common encodings, but we |
| 635 | make no promises. See the `guess_bytes` function for how this is done. |
| 636 | |
| 637 | The output is a stream of fixed lines of text. |
| 638 | """ |
| 639 | if config is None: |
| 640 | config = TextFixerConfig() |
| 641 | config = _config_from_kwargs(config, kwargs) |
| 642 | |
| 643 | for line in input_file: |
| 644 | if isinstance(line, bytes): |
| 645 | if encoding is None: |
| 646 | line, encoding = guess_bytes(line) |
| 647 | else: |
| 648 | line = line.decode(encoding) |
| 649 | if config.unescape_html == "auto" and "<" in line: |
| 650 | config = config._replace(unescape_html=False) |
| 651 | |
| 652 | fixed_line, _explan = fix_and_explain(line, config) |
| 653 | yield fixed_line |
| 654 | |
| 655 | |
| 656 | def guess_bytes(bstring: bytes) -> tuple[str, str]: |
no test coverage detected