Converts a bytes string with python source code to unicode. Unicode strings are passed through unchanged. Byte strings are checked for the python source file encoding cookie to determine encoding. txt can be either a bytes buffer or a string containing the source code.
(txt: str | bytes | BytesIO, errors: str = 'replace', skip_encoding_cookie: bool = True)
| 17 | cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE) |
| 18 | |
| 19 | def source_to_unicode(txt: str | bytes | BytesIO, errors: str = 'replace', skip_encoding_cookie: bool = True) -> str: |
| 20 | """Converts a bytes string with python source code to unicode. |
| 21 | |
| 22 | Unicode strings are passed through unchanged. Byte strings are checked |
| 23 | for the python source file encoding cookie to determine encoding. |
| 24 | txt can be either a bytes buffer or a string containing the source |
| 25 | code. |
| 26 | """ |
| 27 | if isinstance(txt, str): |
| 28 | return txt |
| 29 | if isinstance(txt, bytes): |
| 30 | buffer = BytesIO(txt) |
| 31 | else: |
| 32 | buffer = txt |
| 33 | try: |
| 34 | encoding, _ = detect_encoding(buffer.readline) |
| 35 | except SyntaxError: |
| 36 | encoding = "ascii" |
| 37 | buffer.seek(0) |
| 38 | with TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) as text: |
| 39 | text.mode = 'r' |
| 40 | if skip_encoding_cookie: |
| 41 | return u"".join(strip_encoding_cookie(text)) |
| 42 | else: |
| 43 | return text.read() |
| 44 | |
| 45 | def strip_encoding_cookie(filelike: Iterable[str]) -> Generator[str, None, None]: |
| 46 | """Generator to pull lines from a text-mode file, skipping the encoding |
no test coverage detected
searching dependent graphs…