hub / github.com/InternLM/lmdeploy / __init__

Method init

lmdeploy/serve/core/async_engine.py:107–171 · view source on GitHub ↗

(self,
                 model_path: str,
                 model_name: str | None = None,
                 backend: Literal['turbomind', 'pytorch'] = 'turbomind',
                 backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None,
                 chat_template_config: ChatTemplateConfig | None = None,
                 max_log_len: int | None = None,
                 trust_remote_code: bool = False,
                 speculative_config: SpeculativeConfig | None = None,
                 **kwargs)

Source from the content-addressed store, hash-verified

105	"""
106
107	def __init__(self,
108	model_path: str,
109	model_name: str \| None = None,
110	backend: Literal['turbomind', 'pytorch'] = 'turbomind',
111	backend_config: TurbomindEngineConfig \| PytorchEngineConfig \| None = None,
112	chat_template_config: ChatTemplateConfig \| None = None,
113	max_log_len: int \| None = None,
114	trust_remote_code: bool = False,
115	speculative_config: SpeculativeConfig \| None = None,
116	**kwargs) -> None:
117	logger.info(f'input backend={backend}, backend_config={backend_config}')
118	logger.info(f'speculative_config={speculative_config}')
119	backend_config = backend_config or (TurbomindEngineConfig()
120	if backend == 'turbomind' else PytorchEngineConfig())
121	self.model_name = model_name if model_name else model_path
122	self.chat_template = get_chat_template(model_path, chat_template_config, trust_remote_code=trust_remote_code)
123	self.tokenizer = Tokenizer(model_path, trust_remote_code=trust_remote_code)
124	self.prompt_processor = MultimodalProcessor(self.tokenizer, self.chat_template)
125	self.hf_gen_cfg = get_hf_gen_cfg(model_path, trust_remote_code=trust_remote_code)
126	self.arch, self.hf_cfg = get_model_arch(model_path, trust_remote_code=trust_remote_code)
127	self.session_len = (_get_and_verify_max_len(self.hf_cfg, None)
128	if backend_config.session_len is None else backend_config.session_len)
129	backend_config.session_len = self.session_len
130	if speculative_config is not None and backend == 'turbomind':
131	logger.warning('speculative decoding is not supported by turbomind ')
132	# build backend engine
133	if backend == 'turbomind':
134	self.engine = self._build_turbomind(model_path=model_path,
135	backend_config=backend_config,
136	trust_remote_code=trust_remote_code,
137	**kwargs)
138	elif backend == 'pytorch':
139	self.engine = self._build_pytorch(model_path=model_path,
140	backend_config=backend_config,
141	trust_remote_code=trust_remote_code,
142	speculative_config=speculative_config,
143	**kwargs)
144	else:
145	raise ValueError(f'unsupported backend {backend}')
146	self.backend_config = self.engine.engine_config
147	self.is_sleeping = backend_config.empty_init
148	self.sleeping_tags: set[str] = set() if not backend_config.empty_init else {'weights', 'kv_cache'}
149	logger.info(f'updated backend_config={self.backend_config}')
150
151	# parameters for member functions
152	self.stop_words = _stop_words(self.chat_template.stop_words, self.tokenizer)
153	if self.stop_words is not None:
154	self.stop_words = self.stop_words[0][0].tolist()
155	self.backend = backend
156	self.request_logger = RequestLogger(max_log_len)
157
158	self.num_spec_token = 0 if backend == 'turbomind' or speculative_config is None \
159	else speculative_config.num_speculative_tokens
160
161	self.session_mgr = SessionManager()
162	self.session_mgr.build_request_handle_pool(self.engine, self.backend_config.max_batch_size)
163
164	# build stat loggers

Callers

nothing calls this directly

Calls 15

_build_turbomindMethod · 0.95

_build_pytorchMethod · 0.95

_build_stat_loggersMethod · 0.95

TurbomindEngineConfigClass · 0.90

PytorchEngineConfigClass · 0.90

get_chat_templateFunction · 0.90

TokenizerClass · 0.90

MultimodalProcessorClass · 0.90

get_hf_gen_cfgFunction · 0.90

get_model_archFunction · 0.90

_get_and_verify_max_lenFunction · 0.90

_stop_wordsFunction · 0.90

Tested by

no test coverage detected

Method __init__

Source from the content-addressed store, hash-verified

Callers

Calls 15

Tested by

Method init