Retrieves and instantiates a tokenizer. Takes a model_name as input, which is assumed to map to the Huggingface repository name - this name is not necessarily the same as the LLMWare model card, which is used to lookup the model in model_configs -> the model_name used here
(self, model_name, **kwargs)
| 12235 | return model |
| 12236 | |
| 12237 | def get_tokenizer(self, model_name, **kwargs): |
| 12238 | |
| 12239 | """ Retrieves and instantiates a tokenizer. Takes a model_name as input, which is |
| 12240 | assumed to map to the Huggingface repository name - this name is not necessarily the same as the |
| 12241 | LLMWare model card, which is used to lookup the model in model_configs -> the model_name used here |
| 12242 | should be the hf_repo attribute on the model card. """ |
| 12243 | |
| 12244 | tokenizer = None |
| 12245 | |
| 12246 | self.model_name = model_name |
| 12247 | |
| 12248 | if self.custom_loader: |
| 12249 | tokenizer = self.custom_loader.loader(self.model_name, self.api_key, self.trust_remote_code, |
| 12250 | self.custom_loader, caller="tokenizer", **kwargs) |
| 12251 | else: |
| 12252 | |
| 12253 | try: |
| 12254 | # will wrap in Exception if import fails |
| 12255 | from transformers import AutoTokenizer |
| 12256 | except ImportError: |
| 12257 | raise DependencyNotInstalledException("transformers") |
| 12258 | |
| 12259 | if self.api_key: |
| 12260 | tokenizer = AutoTokenizer.from_pretrained(model_name, token=self.api_key, |
| 12261 | trust_remote_code=self.trust_remote_code) |
| 12262 | else: |
| 12263 | tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=self.trust_remote_code) |
| 12264 | |
| 12265 | return tokenizer |
| 12266 | |
| 12267 | |
| 12268 | class CustomPTLoader: |
no test coverage detected