MCPcopy
hub / github.com/microsoft/JARVIS / main

Function main

easytool/data_process.py:54–105  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

52
53
54def main():
55 curr_dir = os.path.dirname(__file__)
56
57 for dataset in [
58 "funcqa",
59 "restbench",
60 "toolbench"
61 ]:
62 dataset_path = os.path.join(curr_dir, "data_{}".format(dataset), "test_data")
63
64 if not os.path.exists(dataset_path):
65 os.makedirs(dataset_path)
66
67 if dataset == "funcqa":
68 print("Processing FuncQA dataset ...\n")
69 # Processing FuncQA dataset ...
70 temp_file = os.path.join(dataset_path, "data_toolkengpt_0918.zip")
71 gdown.download(urls[dataset], temp_file, quiet=False)
72 zf = ZipFile(temp_file, 'r')
73 zf.extract("data/funcqa/funcqa_oh.json", ".")
74 zf.extract("data/funcqa/funcqa_mh.json", ".")
75 os.rename("data/funcqa/funcqa_oh.json", "{}/funcqa_oh.json".format(dataset_path))
76 os.rename("data/funcqa/funcqa_mh.json", "{}/funcqa_mh.json".format(dataset_path))
77 os.remove(temp_file)
78 shutil.rmtree("data")
79
80 print("FuncQA dataset Done!\n")
81
82 if dataset == "restbench":
83 print("Processing RestBench dataset ... \n")
84 # Processing RestBench Dataset
85 os.system("wget -P {} -c {}".format(dataset_path, urls[dataset]))
86
87 print("RestBench dataset Done!\n")
88
89 if dataset == "toolbench":
90 print("Processing ToolBench dataset ... \n")
91 # Processing ToolBench Dataset
92 temp_file = os.path.join(dataset_path, "data.zip")
93 gdown.download(urls[dataset], temp_file, quiet=False)
94 zf = ZipFile(temp_file, 'r')
95 zf.extract("data/test_instruction/G2_category.json", ".")
96 zf.extract("data/test_instruction/G3_instruction.json", ".")
97
98 os.rename("data/test_instruction/G2_category.json", "{}/G2_category.json".format(dataset_path))
99 os.rename("data/test_instruction/G3_instruction.json", "{}/G3_instruction.json".format(dataset_path))
100 toolbench_process("{}/G2_category.json".format(dataset_path), "data_{}".format(dataset))
101 toolbench_process("{}/G3_instruction.json".format(dataset_path), "data_{}".format(dataset))
102 os.remove(temp_file)
103 shutil.rmtree("data")
104
105 print("Toolbench dataset Done!\n")
106
107
108if __name__ == '__main__':

Callers 1

data_process.pyFile · 0.70

Calls 1

toolbench_processFunction · 0.85

Tested by

no test coverage detected