Method init

llava/llava_instruct_data.py:133–150 · view source on GitHub ↗

(self, tokenizer)

Source from the content-addressed store, hash-verified

131	class LLaVAInstructDataset(Dataset):
132
133	def __init__(self, tokenizer):
134	super(LLaVAInstructDataset, self).__init__()
135
136	self.tokenizer = tokenizer
137
138	data_file_path = "/mnt/bn/vgfm2/test_dit/llava_v1_5_mix665k.json"
139	self.image_root = "/mnt/bn/vgfm2/test_dit/tuning_data"
140
141	with open(data_file_path, 'r') as f:
142	data = json.load(f)
143	self.list_data_dict = []
144	for item in data:
145	if 'image' in item.keys():
146	self.list_data_dict.append(item)
147
148	self.processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14-336")
149
150	print("Formatting llava instruction data")
151
152	def __len__(self):
153	return len(self.list_data_dict)

nothing calls this directly

from_pretrainedMethod · 0.45

no test coverage detected