MCPcopy Index your code
hub / github.com/NVIDIA/TensorRT-LLM / setup_engine

Method setup_engine

tensorrt_llm/executor/base_worker.py:183–293  ·  view source on GitHub ↗

Setup the engine for the worker.

(self)

Source from the content-addressed store, hash-verified

181 return comm_ranks, device_ids
182
183 def setup_engine(self):
184 """
185 Setup the engine for the worker.
186 """
187
188 if isinstance(self._engine, list):
189 self._engine = self._engine[self.rank]
190
191 def _create_py_executor():
192 args = {}
193 assert hasattr(
194 self.llm_args, "backend"
195 ), "llm_args should be with backend in _create_py_executor"
196 _ = self._get_comm_ranks_device_id()
197 if self._backend == "pytorch":
198 from tensorrt_llm._torch.pyexecutor.py_executor_creator import \
199 create_py_executor
200 create_executor = create_py_executor
201 args["llm_args"] = self.llm_args
202 args["checkpoint_dir"] = self._hf_model_dir
203 args["tokenizer"] = self._tokenizer
204 elif self._backend == "_autodeploy":
205 from tensorrt_llm._torch.auto_deploy.llm_args import \
206 LlmArgs as ADLlmArgs
207 from tensorrt_llm._torch.auto_deploy.shim.ad_executor import \
208 create_autodeploy_executor
209 create_executor = create_autodeploy_executor
210 assert isinstance(self.llm_args, ADLlmArgs)
211 args["ad_config"] = self.llm_args
212 args["tokenizer"] = self._tokenizer
213 else:
214 raise ValueError(f"Unsupported backend config: {self._backend}")
215
216 # Define additional attributes that can be used later, such as in _deduce_max_tokens
217 self.mapping = self.llm_args.parallel_config.to_mapping()
218 self.checkpoint_loader = None
219 if self._backend == "pytorch":
220 from tensorrt_llm._torch.pyexecutor.model_loader import \
221 _construct_checkpoint_loader
222 self.checkpoint_loader = _construct_checkpoint_loader(
223 self.llm_args.backend, self.llm_args.checkpoint_loader,
224 self.llm_args.checkpoint_format)
225
226 self.max_seq_len = self.llm_args.max_seq_len
227 # creare_py_executor may change some fields of llm_args
228 _executor = create_executor(**args)
229 if _executor.max_seq_len is not None:
230 # max_seq_len might be updated by model engine as in create_py_executor
231 self.max_seq_len = _executor.max_seq_len
232 return _executor
233
234 def _create_engine(executor_config):
235 engine = self._engine
236 if executor_config is None:
237 executor_config = tllm.ExecutorConfig(1)
238 executor_config.logits_post_processor_config = tllm.LogitsPostProcessorConfig(
239 processor_batched=self._batched_logits_processor,
240 replicate=False)

Callers

nothing calls this directly

Calls 6

LoraManagerClass · 0.85
get_lora_managerMethod · 0.80
from_json_fileMethod · 0.45
getMethod · 0.45

Tested by

no test coverage detected