MCPcopy
hub / github.com/NVIDIA/TensorRT-LLM / forward

Method forward

tensorrt_llm/models/enc_dec/model.py:716–763  ·  view source on GitHub ↗
(self,
                input_ids: Tensor,
                input_lengths=None,
                position_ids=None,
                token_type_ids=None,
                hidden_states=None,
                max_input_length=None,
                prompt_embedding_table=None,
                prompt_tasks=None,
                prompt_vocab_size=None,
                attention_mask=None,
                lora_params: LoraParams = None,
                language_adapter_routings: Optional[Tensor] = None)

Source from the content-addressed store, hash-verified

714 config.set_if_not_exist('residual_scaling', 1.0)
715
716 def forward(self,
717 input_ids: Tensor,
718 input_lengths=None,
719 position_ids=None,
720 token_type_ids=None,
721 hidden_states=None,
722 max_input_length=None,
723 prompt_embedding_table=None,
724 prompt_tasks=None,
725 prompt_vocab_size=None,
726 attention_mask=None,
727 lora_params: LoraParams = None,
728 language_adapter_routings: Optional[Tensor] = None):
729 # In PP, layer 0 has ids as inputs, all other layers have hidden_states as inputs
730 if self.mapping.is_first_pp_rank():
731 ptuning_args = [
732 prompt_embedding_table, prompt_tasks, prompt_vocab_size
733 ] if prompt_embedding_table is not None else []
734
735 hidden_states = self.transformer.embedding(input_ids, position_ids,
736 token_type_ids,
737 *ptuning_args)
738 self.register_network_output('embedding_layer_output',
739 hidden_states)
740 else:
741 hidden_states = recv(hidden_states, self.mapping.prev_pp_rank())
742
743 for layer_idx, encoder_layer in enumerate(self.transformer.layers):
744 lora_layer_params = None
745 if lora_params is not None and lora_params.lora_ranks is not None:
746 lora_layer_params = lora_params.get_layer_params(layer_idx)
747 hidden_states = encoder_layer(
748 hidden_states=hidden_states,
749 attention_mask=attention_mask,
750 input_lengths=input_lengths,
751 max_input_length=max_input_length,
752 lora_layer_params=lora_layer_params,
753 language_adapter_routings=language_adapter_routings)
754
755 if self.mapping.is_last_pp_rank():
756 if self.has_model_final_layernorm:
757 hidden_states = self.transformer.ln_f(hidden_states)
758 hidden_states.mark_output('encoder_output', self._dtype)
759 else:
760 hidden_states = send(hidden_states, self.mapping.next_pp_rank())
761 hidden_states.mark_output('hidden_states_output', self._dtype)
762
763 return hidden_states
764
765 def prepare_inputs(self,
766 max_batch_size,

Callers

nothing calls this directly

Calls 10

recvFunction · 0.90
sendFunction · 0.90
embeddingMethod · 0.80
get_layer_paramsMethod · 0.80
mark_outputMethod · 0.80
is_first_pp_rankMethod · 0.45
prev_pp_rankMethod · 0.45
is_last_pp_rankMethod · 0.45
next_pp_rankMethod · 0.45

Tested by

no test coverage detected