MCPcopy
hub / github.com/deepspeedai/DeepSpeed / BertSelfOutput

Class BertSelfOutput

tests/unit/modeling.py:364–381  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

362
363
364class BertSelfOutput(nn.Module):
365
366 def __init__(self, config, weights, biases):
367 super(BertSelfOutput, self).__init__()
368 self.dense = nn.Linear(config.hidden_size, config.hidden_size)
369 self.dense.weight = weights[3]
370 self.dense.bias = biases[3]
371 self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
372 self.dropout = nn.Dropout(config.hidden_dropout_prob)
373
374 def forward(self, hidden_states, input_tensor):
375 hidden_states = self.dense(hidden_states)
376 hidden_states = self.dropout(hidden_states)
377 hidden_states = self.LayerNorm(hidden_states + input_tensor)
378 return hidden_states
379
380 def get_w(self):
381 return self.dense.weight
382
383
384class BertAttention(nn.Module):

Callers 1

__init__Method · 0.70

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…