(self,
model_path: str,
model_name: str | None = None,
backend: Literal['turbomind', 'pytorch'] = 'turbomind',
backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None,
chat_template_config: ChatTemplateConfig | None = None,
max_log_len: int | None = None,
trust_remote_code: bool = False,
speculative_config: SpeculativeConfig | None = None,
**kwargs)
| 105 | """ |
| 106 | |
| 107 | def __init__(self, |
| 108 | model_path: str, |
| 109 | model_name: str | None = None, |
| 110 | backend: Literal['turbomind', 'pytorch'] = 'turbomind', |
| 111 | backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None, |
| 112 | chat_template_config: ChatTemplateConfig | None = None, |
| 113 | max_log_len: int | None = None, |
| 114 | trust_remote_code: bool = False, |
| 115 | speculative_config: SpeculativeConfig | None = None, |
| 116 | **kwargs) -> None: |
| 117 | logger.info(f'input backend={backend}, backend_config={backend_config}') |
| 118 | logger.info(f'speculative_config={speculative_config}') |
| 119 | backend_config = backend_config or (TurbomindEngineConfig() |
| 120 | if backend == 'turbomind' else PytorchEngineConfig()) |
| 121 | self.model_name = model_name if model_name else model_path |
| 122 | self.chat_template = get_chat_template(model_path, chat_template_config, trust_remote_code=trust_remote_code) |
| 123 | self.tokenizer = Tokenizer(model_path, trust_remote_code=trust_remote_code) |
| 124 | self.prompt_processor = MultimodalProcessor(self.tokenizer, self.chat_template) |
| 125 | self.hf_gen_cfg = get_hf_gen_cfg(model_path, trust_remote_code=trust_remote_code) |
| 126 | self.arch, self.hf_cfg = get_model_arch(model_path, trust_remote_code=trust_remote_code) |
| 127 | self.session_len = (_get_and_verify_max_len(self.hf_cfg, None) |
| 128 | if backend_config.session_len is None else backend_config.session_len) |
| 129 | backend_config.session_len = self.session_len |
| 130 | if speculative_config is not None and backend == 'turbomind': |
| 131 | logger.warning('speculative decoding is not supported by turbomind ') |
| 132 | # build backend engine |
| 133 | if backend == 'turbomind': |
| 134 | self.engine = self._build_turbomind(model_path=model_path, |
| 135 | backend_config=backend_config, |
| 136 | trust_remote_code=trust_remote_code, |
| 137 | **kwargs) |
| 138 | elif backend == 'pytorch': |
| 139 | self.engine = self._build_pytorch(model_path=model_path, |
| 140 | backend_config=backend_config, |
| 141 | trust_remote_code=trust_remote_code, |
| 142 | speculative_config=speculative_config, |
| 143 | **kwargs) |
| 144 | else: |
| 145 | raise ValueError(f'unsupported backend {backend}') |
| 146 | self.backend_config = self.engine.engine_config |
| 147 | self.is_sleeping = backend_config.empty_init |
| 148 | self.sleeping_tags: set[str] = set() if not backend_config.empty_init else {'weights', 'kv_cache'} |
| 149 | logger.info(f'updated backend_config={self.backend_config}') |
| 150 | |
| 151 | # parameters for member functions |
| 152 | self.stop_words = _stop_words(self.chat_template.stop_words, self.tokenizer) |
| 153 | if self.stop_words is not None: |
| 154 | self.stop_words = self.stop_words[0][0].tolist() |
| 155 | self.backend = backend |
| 156 | self.request_logger = RequestLogger(max_log_len) |
| 157 | |
| 158 | self.num_spec_token = 0 if backend == 'turbomind' or speculative_config is None \ |
| 159 | else speculative_config.num_speculative_tokens |
| 160 | |
| 161 | self.session_mgr = SessionManager() |
| 162 | self.session_mgr.build_request_handle_pool(self.engine, self.backend_config.max_batch_size) |
| 163 | |
| 164 | # build stat loggers |
nothing calls this directly
no test coverage detected