(self, tokenizer)
| 131 | class LLaVAInstructDataset(Dataset): |
| 132 | |
| 133 | def __init__(self, tokenizer): |
| 134 | super(LLaVAInstructDataset, self).__init__() |
| 135 | |
| 136 | self.tokenizer = tokenizer |
| 137 | |
| 138 | data_file_path = "/mnt/bn/vgfm2/test_dit/llava_v1_5_mix665k.json" |
| 139 | self.image_root = "/mnt/bn/vgfm2/test_dit/tuning_data" |
| 140 | |
| 141 | with open(data_file_path, 'r') as f: |
| 142 | data = json.load(f) |
| 143 | self.list_data_dict = [] |
| 144 | for item in data: |
| 145 | if 'image' in item.keys(): |
| 146 | self.list_data_dict.append(item) |
| 147 | |
| 148 | self.processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14-336") |
| 149 | |
| 150 | print("Formatting llava instruction data") |
| 151 | |
| 152 | def __len__(self): |
| 153 | return len(self.list_data_dict) |
nothing calls this directly
no test coverage detected