MCPcopy Index your code
hub / github.com/AstrBotDevs/AstrBot / detect_text_encoding

Function detect_text_encoding

astrbot/core/computer/file_read_utils.py:170–204  ·  view source on GitHub ↗
(sample: bytes)

Source from the content-addressed store, hash-verified

168
169
170def detect_text_encoding(sample: bytes) -> str | None:
171 if not sample:
172 return "utf-8"
173
174 if b"\x00" in sample and not sample.startswith(_UTF_BOMS):
175 odd_bytes = sample[1::2]
176 even_bytes = sample[0::2]
177 odd_zero_ratio = odd_bytes.count(0) / max(len(odd_bytes), 1)
178 even_zero_ratio = even_bytes.count(0) / max(len(even_bytes), 1)
179 if odd_zero_ratio < 0.8 and even_zero_ratio < 0.8:
180 return None
181
182 for encoding in _TEXT_ENCODINGS:
183 try:
184 decoded = sample.decode(encoding)
185 except UnicodeDecodeError as exc:
186 # Probe samples can end in the middle of a multibyte sequence.
187 # When the decode failure only happens at the sample tail, trim a few
188 # bytes and retry so UTF-8 text is not misclassified as binary.
189 if exc.start >= len(sample) - 4:
190 decoded = ""
191 for trim_bytes in range(1, min(4, len(sample)) + 1):
192 try:
193 decoded = sample[:-trim_bytes].decode(encoding)
194 break
195 except UnicodeDecodeError:
196 continue
197 if not decoded:
198 continue
199 else:
200 continue
201 if _looks_like_text(decoded):
202 return encoding
203
204 return None
205
206
207def read_local_text_range_sync(

Callers 2

_runMethod · 0.90
_probe_fileFunction · 0.85

Calls 3

_looks_like_textFunction · 0.85
startswithMethod · 0.80
decodeMethod · 0.80

Tested by

no test coverage detected