hub / github.com/microsoft/Magma / _construct_conv_som

Method _construct_conv_som

data/conversations.py:134–174 · view source on GitHub ↗

Construct conversations for spatial prediction

(self, item, image, visual_traces, frame_pos, pos_traces_to_mark=None, neg_traces_to_mark=None, normalize=True)

Source from the content-addressed store, hash-verified

132
133
134	def _construct_conv_som(self, item, image, visual_traces, frame_pos, pos_traces_to_mark=None, neg_traces_to_mark=None, normalize=True):
135	"""
136	Construct conversations for spatial prediction
137	"""
138
139	if pos_traces_to_mark is None or neg_traces_to_mark is None:
140	pred_tracks = visual_traces['pred_tracks']
141	pred_visibility = visual_traces['pred_visibility']
142	# randomly sample pos_tracks and neg_tracks
143	num_clusters_pos = torch.randint(2, 6, (1,)).item()
144	num_clusters_neg = torch.randint(6, 15, (1,)).item()
145	pos_tracks = pred_tracks[:,frame_pos:,torch.randint(0, pred_tracks.size(2), (num_clusters_pos,))]
146	neg_tracks = pred_tracks[:,frame_pos:,torch.randint(0, pred_tracks.size(2), (num_clusters_neg,))]
147
148	image, pos_traces_to_mark, neg_traces_to_mark, pos_mark_ids, neg_mark_ids, all_idx = \
149	som_prompting(image, pos_tracks, neg_tracks, draw_som_positive=True, draw_som_negative=True)
150
151	conv_user = (
152	f"{self.image_placeholder}\nThe image is split into {self.spatial_quant_size}x{self.spatial_quant_size} grids, and labeled with numeric marks.\n"
153	f"Please locate all the numerical marks in the image.\n"
154	)
155
156	# combine pos_traces_to_mark and neg_traces_to_mark
157	pos_traces_to_mark.update(neg_traces_to_mark)
158	# sort pos_traces_to_mark by the key
159	pos_traces_to_mark = dict(sorted(pos_traces_to_mark.items()))
160
161	marks_pos = []
162	for key, val in pos_traces_to_mark.items():
163	trace = val[0]
164	if normalize:
165	x = int(self.spatial_quant_size * trace[0, 0] / image.size[0])
166	y = int(self.spatial_quant_size * trace[0, 1] / image.size[1])
167	else:
168	x = int(trace[0, 0])
169	y = int(trace[0, 1])
170	val_str = f"[{x},{y}]"
171	marks_pos.append(f'Mark {key} at {val_str}')
172
173	conv_gpt = ". ".join(marks_pos) + '\n'
174	return conv_user, conv_gpt, image
175
176	def _construct_conv_tom(self, item, video_path, visual_traces):
177	"""

Callers 2

_construct_convMethod · 0.95

__call__Method · 0.80

Calls 1

som_promptingFunction · 0.90

Tested by

no test coverage detected