(imagenet_r, imagenetr_labels, model, preprocess, device)
| 77 | |
| 78 | |
| 79 | def classify_imagenetr(imagenet_r, imagenetr_labels, model, preprocess, device): |
| 80 | res = [] |
| 81 | |
| 82 | for item in imagenet_r.imgs: |
| 83 | img, label = item |
| 84 | image = Image.open(img) |
| 85 | image_input = preprocess(image).unsqueeze(0).to(device) |
| 86 | text_inputs = torch.cat( |
| 87 | [clip.tokenize(f"a picture of a {c}") for c in imagenetr_labels]).to(device) |
| 88 | |
| 89 | # Calculate features |
| 90 | with torch.no_grad(): |
| 91 | image_features = model.encode_image(image_input) |
| 92 | text_features = model.encode_text(text_inputs) |
| 93 | |
| 94 | image_features /= image_features.norm(dim=-1, keepdim=True) |
| 95 | text_features /= text_features.norm(dim=-1, keepdim=True) |
| 96 | similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1) |
| 97 | values, indices = similarity[0].topk(1) |
| 98 | |
| 99 | for value, index in zip(values, indices): |
| 100 | res.append([index.cpu().numpy(), label]) |
| 101 | res = np.array(res) |
| 102 | acc = np.mean(np.array(res)[:, 0] == np.array(res)[:, 1]) |
| 103 | return res, acc |
| 104 | |
| 105 | def perform_inference(model_index, data_index): |
| 106 | model_pretrain, dataset = CLIP_MODELS[model_index], DATA_FOLDER[data_index] |
no test coverage detected