MCPcopy
hub / github.com/NVIDIA/TensorRT-LLM / EncoderLayer

Class EncoderLayer

tensorrt_llm/models/enc_dec/model.py:181–342  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

179
180
181class EncoderLayer(Module):
182
183 def __init__(self,
184 hidden_size,
185 ffn_hidden_size,
186 num_attention_heads,
187 num_kv_heads,
188 head_size,
189 max_position_embeddings=None,
190 q_scaling=1.0,
191 has_attention_qkvo_bias=False,
192 has_mlp_bias=False,
193 layernorm_position=LayerNormPositionType.pre_layernorm,
194 layernorm_type=LayerNormType.LayerNorm,
195 layernorm_eps=1e-5,
196 hidden_act="relu",
197 mlp_type=MLPType.MLP,
198 mapping=Mapping(),
199 dtype=None,
200 residual_scaling=1.0,
201 relative_attention=False,
202 max_distance=0,
203 num_buckets=0,
204 fp16_clamping=False,
205 quant_mode=QuantMode(0),
206 language_adapter_config: LanguageAdapterConfig = None):
207 super().__init__()
208
209 # e.g. BART regular, T5 RMS
210 self.layernorm_type = layernorm_type
211 ln_type = layernorm_map[layernorm_type]
212
213 # e.g. BART post, T5 pre
214 self.layernorm_position = layernorm_position
215
216 # e.g. BART q_scaling = 1.f, T5 q_scaling = 1.f/sqrt(head_size)
217 self.attention = BertAttention(
218 hidden_size,
219 num_attention_heads,
220 attention_head_size=head_size,
221 num_kv_heads=num_kv_heads,
222 max_position_embeddings=max_position_embeddings,
223 q_scaling=q_scaling,
224 bias=has_attention_qkvo_bias,
225 tp_group=mapping.tp_group,
226 tp_size=mapping.tp_size,
227 tp_rank=mapping.tp_rank,
228 dtype=dtype,
229 relative_attention=relative_attention,
230 max_distance=max_distance,
231 num_buckets=num_buckets,
232 quant_mode=quant_mode)
233
234 self.attention_layernorm = ln_type(normalized_shape=hidden_size,
235 eps=layernorm_eps,
236 dtype=dtype)
237
238 # T5/BART MLP, Flan-T5 GatedMLP

Callers 2

__init__Method · 0.85
__init__Method · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected