(single_step_data_path, out_path, factor=0.5, train_ratio=0.9, do_copy=True, use_qwen3=False)
| 197 | return [int(rel_x1), int(rel_y1), int(rel_x2), int(rel_y2)] |
| 198 | |
| 199 | def construct_ss_data(single_step_data_path, out_path, factor=0.5, train_ratio=0.9, do_copy=True, use_qwen3=False): |
| 200 | if not os.path.exists(single_step_data_path): |
| 201 | return [], [], [], [] |
| 202 | |
| 203 | augment_config_path = os.path.join(os.path.dirname(__file__), 'augment_config.json') |
| 204 | rules = load_augmentation_rules(augment_config_path) |
| 205 | |
| 206 | # 初始化所有返回变量 |
| 207 | decider_ss_entry_train = [] |
| 208 | decider_ss_entry_val = [] |
| 209 | grounder_ss_entry_train = [] |
| 210 | grounder_ss_entry_val = [] |
| 211 | |
| 212 | decider_ss_path = os.path.join(single_step_data_path, "decider") |
| 213 | if os.path.exists(decider_ss_path): |
| 214 | for root, dirs, files in tqdm(os.walk(decider_ss_path), desc="constructing single step decider dataset"): |
| 215 | if len(files) == 0: |
| 216 | continue |
| 217 | if "react.json" not in files: |
| 218 | continue |
| 219 | if "tasks.json" not in files: |
| 220 | continue |
| 221 | |
| 222 | react_path = os.path.join(root, "react.json") |
| 223 | with open(react_path, "r", encoding="UTF-8") as f: |
| 224 | react_data = json.load(f) |
| 225 | |
| 226 | tasks_path = os.path.join(root, "tasks.json") |
| 227 | with open(tasks_path, "r", encoding="UTF-8") as f: |
| 228 | tasks = json.load(f) |
| 229 | |
| 230 | for i, react in enumerate(react_data, 1): |
| 231 | is_train = random.random() < train_ratio |
| 232 | |
| 233 | augment_rule = augment_data(react, rules) |
| 234 | |
| 235 | img_path = os.path.join(root, f"{i}.jpg") |
| 236 | out_abspath, width, height = resize_and_copy_image("ss", img_path, single_step_data_path, out_path, factor, do_copy=do_copy) |
| 237 | |
| 238 | reasoning = react["reasoning"] |
| 239 | action_type = react["function"]["name"] |
| 240 | param = react["function"]["parameters"] |
| 241 | |
| 242 | action_type, param = validate_action(action_type, param) |
| 243 | |
| 244 | random_tasks = random.sample(tasks, 1) |
| 245 | |
| 246 | for task in random_tasks: |
| 247 | output_dict = dict(reasoning=reasoning, action=action_type, parameters=param) |
| 248 | if use_qwen3: |
| 249 | output, _ = format_qwen3_decider_output(output_dict) |
| 250 | instruction = decider_prompt_qwen3_no_history.format(task=task) |
| 251 | else: |
| 252 | output = json.dumps(output_dict, ensure_ascii=False) |
| 253 | instruction = decider_prompt_no_history.format(task=task) |
| 254 | |
| 255 | aug_num_repeat = augment_num_repeat("decider_no_history", augment_rule, is_train) |
| 256 | entries = create_entries_for_one_step( |
no test coverage detected