MCPcopy
hub / github.com/IPADS-SAI/MobiAgent / construct_ss_data

Function construct_ss_data

collect/construct_sft.py:199–315  ·  view source on GitHub ↗
(single_step_data_path, out_path, factor=0.5, train_ratio=0.9, do_copy=True, use_qwen3=False)

Source from the content-addressed store, hash-verified

197 return [int(rel_x1), int(rel_y1), int(rel_x2), int(rel_y2)]
198
199def construct_ss_data(single_step_data_path, out_path, factor=0.5, train_ratio=0.9, do_copy=True, use_qwen3=False):
200 if not os.path.exists(single_step_data_path):
201 return [], [], [], []
202
203 augment_config_path = os.path.join(os.path.dirname(__file__), 'augment_config.json')
204 rules = load_augmentation_rules(augment_config_path)
205
206 # 初始化所有返回变量
207 decider_ss_entry_train = []
208 decider_ss_entry_val = []
209 grounder_ss_entry_train = []
210 grounder_ss_entry_val = []
211
212 decider_ss_path = os.path.join(single_step_data_path, "decider")
213 if os.path.exists(decider_ss_path):
214 for root, dirs, files in tqdm(os.walk(decider_ss_path), desc="constructing single step decider dataset"):
215 if len(files) == 0:
216 continue
217 if "react.json" not in files:
218 continue
219 if "tasks.json" not in files:
220 continue
221
222 react_path = os.path.join(root, "react.json")
223 with open(react_path, "r", encoding="UTF-8") as f:
224 react_data = json.load(f)
225
226 tasks_path = os.path.join(root, "tasks.json")
227 with open(tasks_path, "r", encoding="UTF-8") as f:
228 tasks = json.load(f)
229
230 for i, react in enumerate(react_data, 1):
231 is_train = random.random() < train_ratio
232
233 augment_rule = augment_data(react, rules)
234
235 img_path = os.path.join(root, f"{i}.jpg")
236 out_abspath, width, height = resize_and_copy_image("ss", img_path, single_step_data_path, out_path, factor, do_copy=do_copy)
237
238 reasoning = react["reasoning"]
239 action_type = react["function"]["name"]
240 param = react["function"]["parameters"]
241
242 action_type, param = validate_action(action_type, param)
243
244 random_tasks = random.sample(tasks, 1)
245
246 for task in random_tasks:
247 output_dict = dict(reasoning=reasoning, action=action_type, parameters=param)
248 if use_qwen3:
249 output, _ = format_qwen3_decider_output(output_dict)
250 instruction = decider_prompt_qwen3_no_history.format(task=task)
251 else:
252 output = json.dumps(output_dict, ensure_ascii=False)
253 instruction = decider_prompt_no_history.format(task=task)
254
255 aug_num_repeat = augment_num_repeat("decider_no_history", augment_rule, is_train)
256 entries = create_entries_for_one_step(

Callers 1

construct_dsFunction · 0.85

Calls 10

load_augmentation_rulesFunction · 0.85
augment_dataFunction · 0.85
resize_and_copy_imageFunction · 0.85
validate_actionFunction · 0.85
augment_num_repeatFunction · 0.85
relative_bboxFunction · 0.85
formatMethod · 0.80

Tested by

no test coverage detected