hub / github.com/IPADS-SAI/MobiAgent / construct_ss_data

Function construct_ss_data

collect/construct_sft.py:199–315 · view source on GitHub ↗

(single_step_data_path, out_path, factor=0.5, train_ratio=0.9, do_copy=True, use_qwen3=False)

Source from the content-addressed store, hash-verified

197	return [int(rel_x1), int(rel_y1), int(rel_x2), int(rel_y2)]
198
199	def construct_ss_data(single_step_data_path, out_path, factor=0.5, train_ratio=0.9, do_copy=True, use_qwen3=False):
200	if not os.path.exists(single_step_data_path):
201	return [], [], [], []
202
203	augment_config_path = os.path.join(os.path.dirname(__file__), 'augment_config.json')
204	rules = load_augmentation_rules(augment_config_path)
205
206	# 初始化所有返回变量
207	decider_ss_entry_train = []
208	decider_ss_entry_val = []
209	grounder_ss_entry_train = []
210	grounder_ss_entry_val = []
211
212	decider_ss_path = os.path.join(single_step_data_path, "decider")
213	if os.path.exists(decider_ss_path):
214	for root, dirs, files in tqdm(os.walk(decider_ss_path), desc="constructing single step decider dataset"):
215	if len(files) == 0:
216	continue
217	if "react.json" not in files:
218	continue
219	if "tasks.json" not in files:
220	continue
221
222	react_path = os.path.join(root, "react.json")
223	with open(react_path, "r", encoding="UTF-8") as f:
224	react_data = json.load(f)
225
226	tasks_path = os.path.join(root, "tasks.json")
227	with open(tasks_path, "r", encoding="UTF-8") as f:
228	tasks = json.load(f)
229
230	for i, react in enumerate(react_data, 1):
231	is_train = random.random() < train_ratio
232
233	augment_rule = augment_data(react, rules)
234
235	img_path = os.path.join(root, f"{i}.jpg")
236	out_abspath, width, height = resize_and_copy_image("ss", img_path, single_step_data_path, out_path, factor, do_copy=do_copy)
237
238	reasoning = react["reasoning"]
239	action_type = react["function"]["name"]
240	param = react["function"]["parameters"]
241
242	action_type, param = validate_action(action_type, param)
243
244	random_tasks = random.sample(tasks, 1)
245
246	for task in random_tasks:
247	output_dict = dict(reasoning=reasoning, action=action_type, parameters=param)
248	if use_qwen3:
249	output, _ = format_qwen3_decider_output(output_dict)
250	instruction = decider_prompt_qwen3_no_history.format(task=task)
251	else:
252	output = json.dumps(output_dict, ensure_ascii=False)
253	instruction = decider_prompt_no_history.format(task=task)
254
255	aug_num_repeat = augment_num_repeat("decider_no_history", augment_rule, is_train)
256	entries = create_entries_for_one_step(

Callers 1

construct_dsFunction · 0.85

Calls 10

load_augmentation_rulesFunction · 0.85

augment_dataFunction · 0.85

resize_and_copy_imageFunction · 0.85

validate_actionFunction · 0.85

format_qwen3_decider_outputFunction · 0.85

augment_num_repeatFunction · 0.85

create_entries_for_one_stepFunction · 0.85

relative_bboxFunction · 0.85

format_qwen3_grounder_outputFunction · 0.85

formatMethod · 0.80

Tested by

no test coverage detected