Setup the engine for the worker.
(self)
| 181 | return comm_ranks, device_ids |
| 182 | |
| 183 | def setup_engine(self): |
| 184 | """ |
| 185 | Setup the engine for the worker. |
| 186 | """ |
| 187 | |
| 188 | if isinstance(self._engine, list): |
| 189 | self._engine = self._engine[self.rank] |
| 190 | |
| 191 | def _create_py_executor(): |
| 192 | args = {} |
| 193 | assert hasattr( |
| 194 | self.llm_args, "backend" |
| 195 | ), "llm_args should be with backend in _create_py_executor" |
| 196 | _ = self._get_comm_ranks_device_id() |
| 197 | if self._backend == "pytorch": |
| 198 | from tensorrt_llm._torch.pyexecutor.py_executor_creator import \ |
| 199 | create_py_executor |
| 200 | create_executor = create_py_executor |
| 201 | args["llm_args"] = self.llm_args |
| 202 | args["checkpoint_dir"] = self._hf_model_dir |
| 203 | args["tokenizer"] = self._tokenizer |
| 204 | elif self._backend == "_autodeploy": |
| 205 | from tensorrt_llm._torch.auto_deploy.llm_args import \ |
| 206 | LlmArgs as ADLlmArgs |
| 207 | from tensorrt_llm._torch.auto_deploy.shim.ad_executor import \ |
| 208 | create_autodeploy_executor |
| 209 | create_executor = create_autodeploy_executor |
| 210 | assert isinstance(self.llm_args, ADLlmArgs) |
| 211 | args["ad_config"] = self.llm_args |
| 212 | args["tokenizer"] = self._tokenizer |
| 213 | else: |
| 214 | raise ValueError(f"Unsupported backend config: {self._backend}") |
| 215 | |
| 216 | # Define additional attributes that can be used later, such as in _deduce_max_tokens |
| 217 | self.mapping = self.llm_args.parallel_config.to_mapping() |
| 218 | self.checkpoint_loader = None |
| 219 | if self._backend == "pytorch": |
| 220 | from tensorrt_llm._torch.pyexecutor.model_loader import \ |
| 221 | _construct_checkpoint_loader |
| 222 | self.checkpoint_loader = _construct_checkpoint_loader( |
| 223 | self.llm_args.backend, self.llm_args.checkpoint_loader, |
| 224 | self.llm_args.checkpoint_format) |
| 225 | |
| 226 | self.max_seq_len = self.llm_args.max_seq_len |
| 227 | # creare_py_executor may change some fields of llm_args |
| 228 | _executor = create_executor(**args) |
| 229 | if _executor.max_seq_len is not None: |
| 230 | # max_seq_len might be updated by model engine as in create_py_executor |
| 231 | self.max_seq_len = _executor.max_seq_len |
| 232 | return _executor |
| 233 | |
| 234 | def _create_engine(executor_config): |
| 235 | engine = self._engine |
| 236 | if executor_config is None: |
| 237 | executor_config = tllm.ExecutorConfig(1) |
| 238 | executor_config.logits_post_processor_config = tllm.LogitsPostProcessorConfig( |
| 239 | processor_batched=self._batched_logits_processor, |
| 240 | replicate=False) |
nothing calls this directly
no test coverage detected