MCPcopy
hub / github.com/NVlabs/RADIO / OpenAI_CLIP_VisionAdapter

Class OpenAI_CLIP_VisionAdapter

examples/common/model_loader.py:285–327  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

283
284
285class OpenAI_CLIP_VisionAdapter(nn.Module):
286 def __init__(self, model):
287 super().__init__()
288 self.input_resolution = model.input_resolution
289 self.output_dim = model.output_dim
290 self.conv1 = model.conv1
291
292 self.class_embedding = model.class_embedding
293 self.positional_embedding = model.positional_embedding
294 self.ln_pre = model.ln_pre
295
296 self.transformer = model.transformer
297
298 self.ln_post = model.ln_post
299 self.proj = model.proj
300
301 @property
302 def patch_size(self):
303 return self.conv1.kernel_size
304
305 def forward(self, x: torch.Tensor):
306 x = self.conv1(x) # shape = [*, width, grid, grid]
307 x = x.reshape(x.shape[0], x.shape[1], -1) # shape = [*, width, grid ** 2]
308 x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width]
309 x = torch.cat([
310 self.class_embedding.to(x.dtype) + torch.zeros(x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device),
311 x
312 ], dim=1) # shape = [*, grid ** 2 + 1, width]
313 x = x + self.positional_embedding.to(x.dtype)
314 x = self.ln_pre(x)
315
316 x = x.permute(1, 0, 2) # NLD -> LND
317 x = self.transformer(x)
318 x = x.permute(1, 0, 2) # LND -> NLD
319
320 feats = x[:, 1:]
321
322 x = self.ln_post(x[:, 0, :])
323
324 if self.proj is not None:
325 x = x @ self.proj
326
327 return x, feats
328
329@dataclass
330class ModelInfo:

Callers 1

load_modelFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…