(self, ids, mask=None)
| 301 | self.apply(init_weights) |
| 302 | |
| 303 | def forward(self, ids, mask=None): |
| 304 | x = self.token_embedding(ids) |
| 305 | x = self.dropout(x) |
| 306 | e = self.pos_embedding(x.size(1), |
| 307 | x.size(1)) if self.shared_pos else None |
| 308 | for block in self.blocks: |
| 309 | x = block(x, mask, pos_bias=e) |
| 310 | x = self.norm(x) |
| 311 | x = self.dropout(x) |
| 312 | return x |
| 313 | |
| 314 | |
| 315 | class T5Decoder(nn.Module): |