Clean an input string by removing HTML escapes, control characters, and other unwanted characters.
(input: Any)
| 139 | # Refer the utils functions of the official GraphRAG implementation: |
| 140 | # https://github.com/microsoft/graphrag |
| 141 | def clean_str(input: Any) -> str: |
| 142 | """Clean an input string by removing HTML escapes, control characters, and other unwanted characters.""" |
| 143 | # If we get non-string input, just give it back |
| 144 | if not isinstance(input, str): |
| 145 | return input |
| 146 | |
| 147 | result = html.unescape(input.strip()) |
| 148 | # https://stackoverflow.com/questions/4324790/removing-control-characters-from-a-string-in-python |
| 149 | return re.sub(r"[\x00-\x1f\x7f-\x9f]", "", result) |
| 150 | |
| 151 | |
| 152 | # Utils types ----------------------------------------------------------------------- |
no outgoing calls
no test coverage detected