MCPcopy
hub / github.com/IPADS-SAI/MobiAgent / construct_ds

Function construct_ds

collect/construct_sft.py:494–730  ·  view source on GitHub ↗
(data_path, single_step_data_path, unexpected_img_path, out_path, factor=0.5, train_ratio=0.9, e2e=False, do_copy=True, use_qwen3=False)

Source from the content-addressed store, hash-verified

492 return normal_entries, no_history_entries, terminate_entries
493
494def construct_ds(data_path, single_step_data_path, unexpected_img_path, out_path, factor=0.5, train_ratio=0.9, e2e=False, do_copy=True, use_qwen3=False):
495 os.makedirs(out_path, exist_ok=True)
496
497 e2e_entries_train = []
498 e2e_terminate_entries_train = []
499 e2e_no_history_entries_train = []
500
501 e2e_entries_val = []
502 e2e_terminate_entries_val = []
503 e2e_no_history_entries_val = []
504
505 # 训练集
506 decider_entries_train = []
507 terminate_entries_train = []
508 decider_no_history_entries_train = []
509 grounder_entries_train = []
510
511 # 验证集
512 decider_entries_val = []
513 terminate_entries_val = []
514 decider_no_history_entries_val = []
515 grounder_entries_val = []
516
517 augment_config_path = os.path.join(os.path.dirname(__file__), 'augment_config.json')
518 rules = load_augmentation_rules(augment_config_path)
519
520 if os.path.exists(unexpected_img_path):
521 unexpected_img_dir = os.path.abspath(unexpected_img_path)
522 unexpected_img_paths = os.listdir(unexpected_img_dir)
523 unexpected_img_paths = [os.path.join(unexpected_img_dir, img) for img in unexpected_img_paths]
524
525 unexpected_img_safe_abspaths = []
526 for unexpected_img_path in unexpected_img_paths:
527 out_abspath, width, height = resize_and_copy_image("unexpected", unexpected_img_path, unexpected_img_dir, out_path, factor, do_copy=True)
528 unexpected_img_safe_abspaths.append(out_abspath)
529 else:
530 unexpected_img_safe_abspaths = []
531
532 for root, dirs, files in tqdm(os.walk(data_path), desc="constructing dataset"):
533 if len(files) == 0:
534 continue
535 if "actions.json" not in files or "react.json" not in files or "parse.error" in files:
536 continue
537
538 actions_json = os.path.join(root, "actions.json")
539 with open(actions_json, 'r', encoding='utf-8') as file:
540 try:
541 data = json.load(file)
542 except json.JSONDecodeError as e:
543 print(f"Error decoding JSON in {root}.")
544 raise e
545 task_description = data.get("task_description")
546 actions = data.get("actions")
547 react_json = os.path.join(root, "react.json")
548 with open(react_json, "r", encoding="UTF-8") as f:
549 try:
550 react_data = json.load(f)
551 except json.JSONDecodeError as e:

Callers 1

construct_sft.pyFile · 0.85

Tested by

no test coverage detected