(data_path, single_step_data_path, unexpected_img_path, out_path, factor=0.5, train_ratio=0.9, e2e=False, do_copy=True, use_qwen3=False)
| 492 | return normal_entries, no_history_entries, terminate_entries |
| 493 | |
| 494 | def construct_ds(data_path, single_step_data_path, unexpected_img_path, out_path, factor=0.5, train_ratio=0.9, e2e=False, do_copy=True, use_qwen3=False): |
| 495 | os.makedirs(out_path, exist_ok=True) |
| 496 | |
| 497 | e2e_entries_train = [] |
| 498 | e2e_terminate_entries_train = [] |
| 499 | e2e_no_history_entries_train = [] |
| 500 | |
| 501 | e2e_entries_val = [] |
| 502 | e2e_terminate_entries_val = [] |
| 503 | e2e_no_history_entries_val = [] |
| 504 | |
| 505 | # 训练集 |
| 506 | decider_entries_train = [] |
| 507 | terminate_entries_train = [] |
| 508 | decider_no_history_entries_train = [] |
| 509 | grounder_entries_train = [] |
| 510 | |
| 511 | # 验证集 |
| 512 | decider_entries_val = [] |
| 513 | terminate_entries_val = [] |
| 514 | decider_no_history_entries_val = [] |
| 515 | grounder_entries_val = [] |
| 516 | |
| 517 | augment_config_path = os.path.join(os.path.dirname(__file__), 'augment_config.json') |
| 518 | rules = load_augmentation_rules(augment_config_path) |
| 519 | |
| 520 | if os.path.exists(unexpected_img_path): |
| 521 | unexpected_img_dir = os.path.abspath(unexpected_img_path) |
| 522 | unexpected_img_paths = os.listdir(unexpected_img_dir) |
| 523 | unexpected_img_paths = [os.path.join(unexpected_img_dir, img) for img in unexpected_img_paths] |
| 524 | |
| 525 | unexpected_img_safe_abspaths = [] |
| 526 | for unexpected_img_path in unexpected_img_paths: |
| 527 | out_abspath, width, height = resize_and_copy_image("unexpected", unexpected_img_path, unexpected_img_dir, out_path, factor, do_copy=True) |
| 528 | unexpected_img_safe_abspaths.append(out_abspath) |
| 529 | else: |
| 530 | unexpected_img_safe_abspaths = [] |
| 531 | |
| 532 | for root, dirs, files in tqdm(os.walk(data_path), desc="constructing dataset"): |
| 533 | if len(files) == 0: |
| 534 | continue |
| 535 | if "actions.json" not in files or "react.json" not in files or "parse.error" in files: |
| 536 | continue |
| 537 | |
| 538 | actions_json = os.path.join(root, "actions.json") |
| 539 | with open(actions_json, 'r', encoding='utf-8') as file: |
| 540 | try: |
| 541 | data = json.load(file) |
| 542 | except json.JSONDecodeError as e: |
| 543 | print(f"Error decoding JSON in {root}.") |
| 544 | raise e |
| 545 | task_description = data.get("task_description") |
| 546 | actions = data.get("actions") |
| 547 | react_json = os.path.join(root, "react.json") |
| 548 | with open(react_json, "r", encoding="UTF-8") as f: |
| 549 | try: |
| 550 | react_data = json.load(f) |
| 551 | except json.JSONDecodeError as e: |
no test coverage detected