(self, tokenizer)
| 57 | class LLaVAPretrainCaptioningDataset(Dataset): |
| 58 | |
| 59 | def __init__(self, tokenizer): |
| 60 | super(LLaVAPretrainCaptioningDataset, self).__init__() |
| 61 | |
| 62 | self.tokenizer = tokenizer |
| 63 | |
| 64 | data_file_path = "/mnt/bn/vgfm2/test_dit/blip_laion_cc_sbu_558k.json" |
| 65 | self.image_root = "/mnt/bn/vgfm2/test_dit/pretraining_data" |
| 66 | |
| 67 | with open(data_file_path, 'r') as f: |
| 68 | data = json.load(f) |
| 69 | self.list_data_dict = [] |
| 70 | for item in data: |
| 71 | if 'image' in item.keys(): |
| 72 | self.list_data_dict.append(item) |
| 73 | |
| 74 | self.processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14-336") |
| 75 | |
| 76 | print("Formatting llava captioning data") |
| 77 | |
| 78 | def __len__(self): |
| 79 | return len(self.list_data_dict) |
nothing calls this directly
no test coverage detected