hub / github.com/NVIDIA/TensorRT-LLM / setup_engine

Method setup_engine

tensorrt_llm/executor/base_worker.py:183–293 · view source on GitHub ↗

Setup the engine for the worker.

(self)

Source from the content-addressed store, hash-verified

181	return comm_ranks, device_ids
182
183	def setup_engine(self):
184	"""
185	Setup the engine for the worker.
186	"""
187
188	if isinstance(self._engine, list):
189	self._engine = self._engine[self.rank]
190
191	def _create_py_executor():
192	args = {}
193	assert hasattr(
194	self.llm_args, "backend"
195	), "llm_args should be with backend in _create_py_executor"
196	_ = self._get_comm_ranks_device_id()
197	if self._backend == "pytorch":
198	from tensorrt_llm._torch.pyexecutor.py_executor_creator import \
199	create_py_executor
200	create_executor = create_py_executor
201	args["llm_args"] = self.llm_args
202	args["checkpoint_dir"] = self._hf_model_dir
203	args["tokenizer"] = self._tokenizer
204	elif self._backend == "_autodeploy":
205	from tensorrt_llm._torch.auto_deploy.llm_args import \
206	LlmArgs as ADLlmArgs
207	from tensorrt_llm._torch.auto_deploy.shim.ad_executor import \
208	create_autodeploy_executor
209	create_executor = create_autodeploy_executor
210	assert isinstance(self.llm_args, ADLlmArgs)
211	args["ad_config"] = self.llm_args
212	args["tokenizer"] = self._tokenizer
213	else:
214	raise ValueError(f"Unsupported backend config: {self._backend}")
215
216	# Define additional attributes that can be used later, such as in _deduce_max_tokens
217	self.mapping = self.llm_args.parallel_config.to_mapping()
218	self.checkpoint_loader = None
219	if self._backend == "pytorch":
220	from tensorrt_llm._torch.pyexecutor.model_loader import \
221	_construct_checkpoint_loader
222	self.checkpoint_loader = _construct_checkpoint_loader(
223	self.llm_args.backend, self.llm_args.checkpoint_loader,
224	self.llm_args.checkpoint_format)
225
226	self.max_seq_len = self.llm_args.max_seq_len
227	# creare_py_executor may change some fields of llm_args
228	_executor = create_executor(**args)
229	if _executor.max_seq_len is not None:
230	# max_seq_len might be updated by model engine as in create_py_executor
231	self.max_seq_len = _executor.max_seq_len
232	return _executor
233
234	def _create_engine(executor_config):
235	engine = self._engine
236	if executor_config is None:
237	executor_config = tllm.ExecutorConfig(1)
238	executor_config.logits_post_processor_config = tllm.LogitsPostProcessorConfig(
239	processor_batched=self._batched_logits_processor,
240	replicate=False)

Callers

nothing calls this directly

Calls 6

_engine_config_to_model_configFunction · 0.85

LoraManagerClass · 0.85

PromptAdapterManagerClass · 0.85

get_lora_managerMethod · 0.80

from_json_fileMethod · 0.45

getMethod · 0.45

Tested by

no test coverage detected