| 70 | |
| 71 | |
| 72 | class Mlp(nn.Layer): |
| 73 | def __init__( |
| 74 | self, |
| 75 | in_features, |
| 76 | hidden_features=None, |
| 77 | out_features=None, |
| 78 | act_layer=nn.GELU, |
| 79 | drop=0.0, |
| 80 | ): |
| 81 | super().__init__() |
| 82 | out_features = out_features or in_features |
| 83 | hidden_features = hidden_features or in_features |
| 84 | self.fc1 = nn.Linear(in_features, hidden_features) |
| 85 | self.act = act_layer() |
| 86 | self.fc2 = nn.Linear(hidden_features, out_features) |
| 87 | self.drop = nn.Dropout(drop) |
| 88 | |
| 89 | def forward(self, x): |
| 90 | x = self.fc1(x) |
| 91 | x = self.act(x) |
| 92 | x = self.drop(x) |
| 93 | x = self.fc2(x) |
| 94 | x = self.drop(x) |
| 95 | return x |
| 96 | |
| 97 | |
| 98 | class Attention(nn.Layer): |