Image to Patch Embedding
| 279 | |
| 280 | |
| 281 | class PatchEmbed(nn.Layer): |
| 282 | """Image to Patch Embedding""" |
| 283 | |
| 284 | def __init__( |
| 285 | self, |
| 286 | img_size=[32, 100], |
| 287 | in_channels=3, |
| 288 | embed_dim=768, |
| 289 | sub_num=2, |
| 290 | patch_size=[4, 4], |
| 291 | mode="pope", |
| 292 | ): |
| 293 | super().__init__() |
| 294 | num_patches = (img_size[1] // (2**sub_num)) * (img_size[0] // (2**sub_num)) |
| 295 | self.img_size = img_size |
| 296 | self.num_patches = num_patches |
| 297 | self.embed_dim = embed_dim |
| 298 | self.norm = None |
| 299 | if mode == "pope": |
| 300 | if sub_num == 2: |
| 301 | self.proj = nn.Sequential( |
| 302 | ConvBNLayer( |
| 303 | in_channels=in_channels, |
| 304 | out_channels=embed_dim // 2, |
| 305 | kernel_size=3, |
| 306 | stride=2, |
| 307 | padding=1, |
| 308 | act=nn.GELU, |
| 309 | bias_attr=None, |
| 310 | ), |
| 311 | ConvBNLayer( |
| 312 | in_channels=embed_dim // 2, |
| 313 | out_channels=embed_dim, |
| 314 | kernel_size=3, |
| 315 | stride=2, |
| 316 | padding=1, |
| 317 | act=nn.GELU, |
| 318 | bias_attr=None, |
| 319 | ), |
| 320 | ) |
| 321 | if sub_num == 3: |
| 322 | self.proj = nn.Sequential( |
| 323 | ConvBNLayer( |
| 324 | in_channels=in_channels, |
| 325 | out_channels=embed_dim // 4, |
| 326 | kernel_size=3, |
| 327 | stride=2, |
| 328 | padding=1, |
| 329 | act=nn.GELU, |
| 330 | bias_attr=None, |
| 331 | ), |
| 332 | ConvBNLayer( |
| 333 | in_channels=embed_dim // 4, |
| 334 | out_channels=embed_dim // 2, |
| 335 | kernel_size=3, |
| 336 | stride=2, |
| 337 | padding=1, |
| 338 | act=nn.GELU, |