MCPcopy
hub / github.com/showlab/Show-o / __init__

Method __init__

llava/llava_instruct_data.py:133–150  ·  view source on GitHub ↗
(self, tokenizer)

Source from the content-addressed store, hash-verified

131class LLaVAInstructDataset(Dataset):
132
133 def __init__(self, tokenizer):
134 super(LLaVAInstructDataset, self).__init__()
135
136 self.tokenizer = tokenizer
137
138 data_file_path = "/mnt/bn/vgfm2/test_dit/llava_v1_5_mix665k.json"
139 self.image_root = "/mnt/bn/vgfm2/test_dit/tuning_data"
140
141 with open(data_file_path, 'r') as f:
142 data = json.load(f)
143 self.list_data_dict = []
144 for item in data:
145 if 'image' in item.keys():
146 self.list_data_dict.append(item)
147
148 self.processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14-336")
149
150 print("Formatting llava instruction data")
151
152 def __len__(self):
153 return len(self.list_data_dict)

Callers

nothing calls this directly

Calls 1

from_pretrainedMethod · 0.45

Tested by

no test coverage detected