MCPcopy
hub / github.com/InternLM/lmdeploy / __init__

Method __init__

lmdeploy/tokenizer.py:46–68  ·  view source on GitHub ↗
(self, model_dir: str, trust_remote_code: bool = False)

Source from the content-addressed store, hash-verified

44 """
45
46 def __init__(self, model_dir: str, trust_remote_code: bool = False):
47 self._check_transformers_version(model_dir, trust_remote_code=trust_remote_code)
48 from transformers import AutoTokenizer
49 self.logger = get_logger('lmdeploy')
50 self.model = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=trust_remote_code)
51 self._prefix_space_tokens = None
52
53 if self.model.eos_token_id is None:
54 generation_config_file = osp.join(model_dir, 'generation_config.json')
55 if osp.exists(generation_config_file):
56 with open(generation_config_file) as f:
57 cfg = json.load(f)
58 self.model.eos_token_id = cfg['eos_token_id']
59 elif hasattr(self.model, 'eod_id'): # Qwen remote
60 self.model.eos_token_id = self.model.eod_id
61
62 # for stop words
63 self._vocab_size_with_added: int = None
64 self._maybe_decode_bytes: bool = None
65 # TODO maybe lack a constant.py
66 self._indexes_tokens_deque = deque(maxlen=10)
67 self.max_indexes_num = 5
68 self.token2id = {}
69
70 def _check_transformers_version(self, model_dir: str, trust_remote_code: bool = False):
71 import transformers

Callers

nothing calls this directly

Calls 5

get_loggerFunction · 0.90
joinMethod · 0.80
from_pretrainedMethod · 0.45
loadMethod · 0.45

Tested by

no test coverage detected