Encodes a string into tokens, ignoring special tokens. This is equivalent to `encode(text, disallowed_special=())` (but slightly faster). ``` >>> enc.encode_ordinary("hello world") [31373, 995]
(self, text: str)
| 64 | # ==================== |
| 65 | |
| 66 | def encode_ordinary(self, text: str) -> list[int]: |
| 67 | """Encodes a string into tokens, ignoring special tokens. |
| 68 | |
| 69 | This is equivalent to `encode(text, disallowed_special=())` (but slightly faster). |
| 70 | |
| 71 | ``` |
| 72 | >>> enc.encode_ordinary("hello world") |
| 73 | [31373, 995] |
| 74 | """ |
| 75 | try: |
| 76 | return self._core_bpe.encode_ordinary(text) |
| 77 | except UnicodeEncodeError: |
| 78 | # See comment in encode |
| 79 | text = text.encode("utf-16", "surrogatepass").decode("utf-16", "replace") |
| 80 | return self._core_bpe.encode_ordinary(text) |
| 81 | |
| 82 | def encode( |
| 83 | self, |