Encode input text into a list of token IDs. Args: text (str): The input text string to tokenize. Returns: list[int]: List of integer token IDs representing the input text.
(self, text: str)
| 40 | return self.tokenizer.vocab_size |
| 41 | |
| 42 | def encode(self, text: str) -> list[int]: |
| 43 | """ |
| 44 | Encode input text into a list of token IDs. |
| 45 | |
| 46 | Args: |
| 47 | text (str): The input text string to tokenize. |
| 48 | |
| 49 | Returns: |
| 50 | list[int]: List of integer token IDs representing the input text. |
| 51 | """ |
| 52 | return self.tokenizer.encode(text, add_special_tokens=False) |
| 53 | |
| 54 | def decode(self, token_ids: list[int]) -> str: |
| 55 | """ |
no outgoing calls