(self, dim, vocab_size)
| 305 | |
| 306 | class GPTOutput(nn.Module): |
| 307 | def __init__(self, dim, vocab_size): |
| 308 | super().__init__() |
| 309 | self.dim = dim |
| 310 | self.norm = Norm(dim) |
| 311 | self.output = Linear(dim, vocab_size) |
| 312 | |
| 313 | self.reset_parameters() |
| 314 | |
| 315 | def reset_parameters(self): |
| 316 | std = 1.0 / math.sqrt(self.dim**2) |
nothing calls this directly
no test coverage detected