(self, model_dir: str, trust_remote_code: bool = False)
| 44 | """ |
| 45 | |
| 46 | def __init__(self, model_dir: str, trust_remote_code: bool = False): |
| 47 | self._check_transformers_version(model_dir, trust_remote_code=trust_remote_code) |
| 48 | from transformers import AutoTokenizer |
| 49 | self.logger = get_logger('lmdeploy') |
| 50 | self.model = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=trust_remote_code) |
| 51 | self._prefix_space_tokens = None |
| 52 | |
| 53 | if self.model.eos_token_id is None: |
| 54 | generation_config_file = osp.join(model_dir, 'generation_config.json') |
| 55 | if osp.exists(generation_config_file): |
| 56 | with open(generation_config_file) as f: |
| 57 | cfg = json.load(f) |
| 58 | self.model.eos_token_id = cfg['eos_token_id'] |
| 59 | elif hasattr(self.model, 'eod_id'): # Qwen remote |
| 60 | self.model.eos_token_id = self.model.eod_id |
| 61 | |
| 62 | # for stop words |
| 63 | self._vocab_size_with_added: int = None |
| 64 | self._maybe_decode_bytes: bool = None |
| 65 | # TODO maybe lack a constant.py |
| 66 | self._indexes_tokens_deque = deque(maxlen=10) |
| 67 | self.max_indexes_num = 5 |
| 68 | self.token2id = {} |
| 69 | |
| 70 | def _check_transformers_version(self, model_dir: str, trust_remote_code: bool = False): |
| 71 | import transformers |
nothing calls this directly
no test coverage detected