Fix text as a single segment, returning the fixed text and an explanation of what was fixed. The explanation is a list of steps that can be applied with :func:`apply_plan`, or if config.explain is False, it will be None.
(
text: str, config: TextFixerConfig | None = None, **kwargs: Any
)
| 362 | |
| 363 | |
| 364 | def fix_and_explain( |
| 365 | text: str, config: TextFixerConfig | None = None, **kwargs: Any |
| 366 | ) -> ExplainedText: |
| 367 | """ |
| 368 | Fix text as a single segment, returning the fixed text and an explanation |
| 369 | of what was fixed. |
| 370 | |
| 371 | The explanation is a list of steps that can be applied with |
| 372 | :func:`apply_plan`, or if config.explain is False, it will be None. |
| 373 | """ |
| 374 | if config is None: |
| 375 | config = TextFixerConfig() |
| 376 | if isinstance(text, bytes): |
| 377 | raise UnicodeError(BYTES_ERROR_TEXT) |
| 378 | config = _config_from_kwargs(config, kwargs) |
| 379 | |
| 380 | if config.unescape_html == "auto" and "<" in text: |
| 381 | config = config._replace(unescape_html=False) |
| 382 | |
| 383 | if config.explain: |
| 384 | steps: list[ExplanationStep] | None = [] |
| 385 | else: |
| 386 | # If explanations aren't desired, `steps` will be None |
| 387 | steps = None |
| 388 | |
| 389 | while True: |
| 390 | origtext = text |
| 391 | |
| 392 | text = _try_fix("unescape_html", text, config, steps) |
| 393 | |
| 394 | if config.fix_encoding: |
| 395 | if steps is None: |
| 396 | text = fix_encoding(text) |
| 397 | else: |
| 398 | text, encoding_steps = fix_encoding_and_explain(text, config) |
| 399 | if encoding_steps is not None: |
| 400 | steps.extend(encoding_steps) |
| 401 | |
| 402 | for fixer in [ |
| 403 | "fix_c1_controls", |
| 404 | "fix_latin_ligatures", |
| 405 | "fix_character_width", |
| 406 | "uncurl_quotes", |
| 407 | "fix_line_breaks", |
| 408 | "fix_surrogates", |
| 409 | "remove_terminal_escapes", |
| 410 | "remove_control_chars", |
| 411 | ]: |
| 412 | text = _try_fix(fixer, text, config, steps) |
| 413 | |
| 414 | if config.normalization is not None: |
| 415 | fixed = unicodedata.normalize(config.normalization, text) |
| 416 | if steps is not None and fixed != text: |
| 417 | steps.append(ExplanationStep("normalize", config.normalization)) |
| 418 | text = fixed |
| 419 | |
| 420 | if text == origtext: |
| 421 | return ExplainedText(text, steps) |