hub / github.com/NVIDIA/TensorRT-LLM / prepare_inputs

Method prepare_inputs

tensorrt_llm/models/enc_dec/model.py:765–1019 · view source on GitHub ↗

@brief: Prepare inputs Tensors for the model, the given sizes are used to determine the ranges of the dimensions of when using TRT dynamic shapes. @return: a list contains values which can be fed into the self.forward()

(self,
                       max_batch_size,
                       max_input_len,
                       prompt_embedding_table_size: int = 0,
                       lora_target_modules: List[str] = None,
                       *args,
                       **kwargs)

Source from the content-addressed store, hash-verified

763	return hidden_states
764
765	def prepare_inputs(self,
766	max_batch_size,
767	max_input_len,
768	prompt_embedding_table_size: int = 0,
769	lora_target_modules: List[str] = None,
770	*args,
771	**kwargs):
772	'''@brief: Prepare inputs Tensors for the model, the given sizes are used to determine the
773	ranges of the dimensions of when using TRT dynamic shapes.
774
775	@return: a list contains values which can be fed into the self.forward()
776	'''
777
778	hidden_size = self.hidden_size
779
780	bs_range = [1, (max_batch_size + 1) // 2, max_batch_size]
781	inlen_range = [1, (max_input_len + 1) // 2, max_input_len]
782	num_tokens_range = [
783	1,
784	(max_input_len * max_batch_size + 1) // 2,
785	max_input_len * max_batch_size,
786	]
787
788	input_ids, position_ids, token_type_ids, hidden_states = None, None, None, None
789	remove_input_padding = default_net().plugin_config.remove_input_padding
790	use_lora_plugin = default_net().plugin_config.lora_plugin
791
792	attention_mask = None
793	if remove_input_padding:
794	if self.mapping.is_first_pp_rank():
795	input_ids = Tensor(
796	name="input_ids",
797	dtype=trt.int32,
798	shape=[-1],
799	dim_range=OrderedDict([("num_tokens", [num_tokens_range])]),
800	)
801	if self.has_position_embedding:
802	position_ids = Tensor(
803	name='position_ids',
804	dtype=trt.int32,
805	shape=[-1],
806	dim_range=OrderedDict([('num_tokens',
807	[num_tokens_range])]),
808	)
809	if self.has_token_type_embedding:
810	token_type_ids = Tensor(
811	name='token_type_ids',
812	dtype=trt.int32,
813	shape=[-1],
814	dim_range=OrderedDict([('num_tokens',
815	[num_tokens_range])]),
816	)
817	else:
818	hidden_states = Tensor(name='hidden_states_input',
819	dtype=self._dtype,
820	shape=[-1, hidden_size],
821	dim_range=OrderedDict([
822	('num_tokens', [num_tokens_range]),

Callers

nothing calls this directly

Calls 7

default_netFunction · 0.90

TensorClass · 0.90

LoraParamsClass · 0.90

pp_layersMethod · 0.80

is_first_pp_rankMethod · 0.45

appendMethod · 0.45

updateMethod · 0.45

Tested by

no test coverage detected