| 137 | |
| 138 | class Block(nn.Layer): |
| 139 | def __init__( |
| 140 | self, |
| 141 | dim, |
| 142 | num_heads, |
| 143 | mlp_ratio=4.0, |
| 144 | qkv_bias=False, |
| 145 | qk_scale=None, |
| 146 | drop=0.0, |
| 147 | attn_drop=0.0, |
| 148 | drop_path=0.0, |
| 149 | act_layer=nn.GELU, |
| 150 | norm_layer="nn.LayerNorm", |
| 151 | epsilon=1e-5, |
| 152 | ): |
| 153 | super().__init__() |
| 154 | if isinstance(norm_layer, str): |
| 155 | self.norm1 = eval(norm_layer)(dim, epsilon=epsilon) |
| 156 | elif isinstance(norm_layer, Callable): |
| 157 | self.norm1 = norm_layer(dim) |
| 158 | else: |
| 159 | raise TypeError("The norm_layer must be str or paddle.nn.layer.Layer class") |
| 160 | self.attn = Attention( |
| 161 | dim, |
| 162 | num_heads=num_heads, |
| 163 | qkv_bias=qkv_bias, |
| 164 | qk_scale=qk_scale, |
| 165 | attn_drop=attn_drop, |
| 166 | proj_drop=drop, |
| 167 | ) |
| 168 | # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here |
| 169 | self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity() |
| 170 | if isinstance(norm_layer, str): |
| 171 | self.norm2 = eval(norm_layer)(dim, epsilon=epsilon) |
| 172 | elif isinstance(norm_layer, Callable): |
| 173 | self.norm2 = norm_layer(dim) |
| 174 | else: |
| 175 | raise TypeError("The norm_layer must be str or paddle.nn.layer.Layer class") |
| 176 | mlp_hidden_dim = int(dim * mlp_ratio) |
| 177 | self.mlp = Mlp( |
| 178 | in_features=dim, |
| 179 | hidden_features=mlp_hidden_dim, |
| 180 | act_layer=act_layer, |
| 181 | drop=drop, |
| 182 | ) |
| 183 | |
| 184 | def forward(self, x): |
| 185 | x = x + self.drop_path(self.attn(self.norm1(x))) |