MCPcopy
hub / github.com/InternLM/lmdeploy / __init__

Method __init__

lmdeploy/serve/core/async_engine.py:107–171  ·  view source on GitHub ↗
(self,
                 model_path: str,
                 model_name: str | None = None,
                 backend: Literal['turbomind', 'pytorch'] = 'turbomind',
                 backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None,
                 chat_template_config: ChatTemplateConfig | None = None,
                 max_log_len: int | None = None,
                 trust_remote_code: bool = False,
                 speculative_config: SpeculativeConfig | None = None,
                 **kwargs)

Source from the content-addressed store, hash-verified

105 """
106
107 def __init__(self,
108 model_path: str,
109 model_name: str | None = None,
110 backend: Literal['turbomind', 'pytorch'] = 'turbomind',
111 backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None,
112 chat_template_config: ChatTemplateConfig | None = None,
113 max_log_len: int | None = None,
114 trust_remote_code: bool = False,
115 speculative_config: SpeculativeConfig | None = None,
116 **kwargs) -> None:
117 logger.info(f'input backend={backend}, backend_config={backend_config}')
118 logger.info(f'speculative_config={speculative_config}')
119 backend_config = backend_config or (TurbomindEngineConfig()
120 if backend == 'turbomind' else PytorchEngineConfig())
121 self.model_name = model_name if model_name else model_path
122 self.chat_template = get_chat_template(model_path, chat_template_config, trust_remote_code=trust_remote_code)
123 self.tokenizer = Tokenizer(model_path, trust_remote_code=trust_remote_code)
124 self.prompt_processor = MultimodalProcessor(self.tokenizer, self.chat_template)
125 self.hf_gen_cfg = get_hf_gen_cfg(model_path, trust_remote_code=trust_remote_code)
126 self.arch, self.hf_cfg = get_model_arch(model_path, trust_remote_code=trust_remote_code)
127 self.session_len = (_get_and_verify_max_len(self.hf_cfg, None)
128 if backend_config.session_len is None else backend_config.session_len)
129 backend_config.session_len = self.session_len
130 if speculative_config is not None and backend == 'turbomind':
131 logger.warning('speculative decoding is not supported by turbomind ')
132 # build backend engine
133 if backend == 'turbomind':
134 self.engine = self._build_turbomind(model_path=model_path,
135 backend_config=backend_config,
136 trust_remote_code=trust_remote_code,
137 **kwargs)
138 elif backend == 'pytorch':
139 self.engine = self._build_pytorch(model_path=model_path,
140 backend_config=backend_config,
141 trust_remote_code=trust_remote_code,
142 speculative_config=speculative_config,
143 **kwargs)
144 else:
145 raise ValueError(f'unsupported backend {backend}')
146 self.backend_config = self.engine.engine_config
147 self.is_sleeping = backend_config.empty_init
148 self.sleeping_tags: set[str] = set() if not backend_config.empty_init else {'weights', 'kv_cache'}
149 logger.info(f'updated backend_config={self.backend_config}')
150
151 # parameters for member functions
152 self.stop_words = _stop_words(self.chat_template.stop_words, self.tokenizer)
153 if self.stop_words is not None:
154 self.stop_words = self.stop_words[0][0].tolist()
155 self.backend = backend
156 self.request_logger = RequestLogger(max_log_len)
157
158 self.num_spec_token = 0 if backend == 'turbomind' or speculative_config is None \
159 else speculative_config.num_speculative_tokens
160
161 self.session_mgr = SessionManager()
162 self.session_mgr.build_request_handle_pool(self.engine, self.backend_config.max_batch_size)
163
164 # build stat loggers

Callers

nothing calls this directly

Calls 15

_build_turbomindMethod · 0.95
_build_pytorchMethod · 0.95
_build_stat_loggersMethod · 0.95
PytorchEngineConfigClass · 0.90
get_chat_templateFunction · 0.90
TokenizerClass · 0.90
MultimodalProcessorClass · 0.90
get_hf_gen_cfgFunction · 0.90
get_model_archFunction · 0.90
_get_and_verify_max_lenFunction · 0.90
_stop_wordsFunction · 0.90

Tested by

no test coverage detected