Tokenize a prompt. Args: s: a prompt. add_bos: Whether to add ``bos`` token id when encoding the prompt. add_special_tokens: Whether or not to add special tokens when encoding the prompt. Returns: list[int]: token ids.
(self, s: str, add_bos: bool = True, add_special_tokens: bool = True, **kwargs)
| 190 | return indexes |
| 191 | |
| 192 | def encode(self, s: str, add_bos: bool = True, add_special_tokens: bool = True, **kwargs): |
| 193 | """Tokenize a prompt. |
| 194 | |
| 195 | Args: |
| 196 | s: a prompt. |
| 197 | add_bos: Whether to add ``bos`` token id when encoding the prompt. |
| 198 | add_special_tokens: Whether or not to add special tokens |
| 199 | when encoding the prompt. |
| 200 | |
| 201 | Returns: |
| 202 | list[int]: token ids. |
| 203 | """ |
| 204 | encoded = self.model.encode(s, add_special_tokens=add_special_tokens, **kwargs) |
| 205 | if not add_bos: |
| 206 | # in the middle of a session |
| 207 | if len(encoded) and encoded[0] == self.bos_token_id: |
| 208 | encoded = encoded[1:] |
| 209 | return encoded |
| 210 | |
| 211 | def decode(self, t: Sequence[int], offset: int | None = None, skip_special_tokens: bool = True): |
| 212 | """De-tokenize. |
no test coverage detected