MCPcopy
hub / github.com/evalplus/evalplus / main

Function main

tools/evalperf/hf_upload.py:13–59  ·  view source on GitHub ↗
(path, overwrite=False)

Source from the content-addressed store, hash-verified

11
12
13def main(path, overwrite=False):
14 assert path.endswith(".jsonl"), f"{path} is not a jsonl file"
15 name = os.path.basename(path).split(".")[0]
16 first, version = name.split("-")
17 assert first == "evalperf", f"Expected fmt evalperf-[date].jsonl; but got {path}"
18
19 evalplus_datasets = {**get_human_eval_plus(), **get_mbpp_plus()}
20
21 with open(path, "r") as f:
22 data = [json.loads(line) for line in f]
23
24 # drop task ids that are removed in latest EvalPlus dataset
25 to_drop = [
26 task["task_id"] for task in data if task["task_id"] not in evalplus_datasets
27 ]
28 print(f"Removing {len(to_drop)} tasks that are not in the latest EvalPlus dataset")
29 print(to_drop)
30 data = [d for d in data if d["task_id"] not in to_drop]
31
32 # convert pe_input into string
33 for d in data:
34 d["pe_input"] = json.dumps(d["pe_input"])
35 d["entry_point"] = evalplus_datasets[d["task_id"]]["entry_point"]
36 d["prompt"] = evalplus_datasets[d["task_id"]]["prompt"]
37
38 # combine
39 dataset = DatasetDict(
40 {
41 "test": Dataset.from_list(data, split="test"),
42 "demo": Dataset.from_list(data[:2], split="demo"),
43 }
44 )
45 print(dataset)
46
47 repo = list_repo_refs(REPO_ID, repo_type="dataset")
48 tags = [tag.name for tag in repo.tags]
49 print(REPO_ID, "has tags:", tags)
50
51 print(f"Uploading dataset with tag {version} to Hub... Please enter to confirm:")
52 input()
53
54 if version in tags and overwrite:
55 print(f"Tag {version} already exists, overwriting...")
56 delete_tag(REPO_ID, repo_type="dataset", tag=version)
57
58 dataset.push_to_hub(REPO_ID, branch="main")
59 create_tag(REPO_ID, repo_type="dataset", tag=version)
60
61
62if __name__ == "__main__":

Callers

nothing calls this directly

Calls 2

get_human_eval_plusFunction · 0.90
get_mbpp_plusFunction · 0.90

Tested by

no test coverage detected