Encodes a list of strings into tokens, in parallel, ignoring special tokens. This is equivalent to `encode_batch(text, disallowed_special=())` (but slightly faster). ``` >>> enc.encode_ordinary_batch(["hello world", "goodbye world"]) [[31373, 995], [11274, 16390, 99
(self, text: list[str], *, num_threads: int = 8)
| 162 | return np.frombuffer(buffer, dtype=np.uint32) |
| 163 | |
| 164 | def encode_ordinary_batch(self, text: list[str], *, num_threads: int = 8) -> list[list[int]]: |
| 165 | """Encodes a list of strings into tokens, in parallel, ignoring special tokens. |
| 166 | |
| 167 | This is equivalent to `encode_batch(text, disallowed_special=())` (but slightly faster). |
| 168 | |
| 169 | ``` |
| 170 | >>> enc.encode_ordinary_batch(["hello world", "goodbye world"]) |
| 171 | [[31373, 995], [11274, 16390, 995]] |
| 172 | ``` |
| 173 | """ |
| 174 | encoder = functools.partial(self.encode_ordinary) |
| 175 | with ThreadPoolExecutor(num_threads) as e: |
| 176 | return list(e.map(encoder, text)) |
| 177 | |
| 178 | def encode_batch( |
| 179 | self, |
no outgoing calls