(path, overwrite=False)
| 11 | |
| 12 | |
| 13 | def main(path, overwrite=False): |
| 14 | assert path.endswith(".jsonl"), f"{path} is not a jsonl file" |
| 15 | name = os.path.basename(path).split(".")[0] |
| 16 | first, version = name.split("-") |
| 17 | assert first == "evalperf", f"Expected fmt evalperf-[date].jsonl; but got {path}" |
| 18 | |
| 19 | evalplus_datasets = {**get_human_eval_plus(), **get_mbpp_plus()} |
| 20 | |
| 21 | with open(path, "r") as f: |
| 22 | data = [json.loads(line) for line in f] |
| 23 | |
| 24 | # drop task ids that are removed in latest EvalPlus dataset |
| 25 | to_drop = [ |
| 26 | task["task_id"] for task in data if task["task_id"] not in evalplus_datasets |
| 27 | ] |
| 28 | print(f"Removing {len(to_drop)} tasks that are not in the latest EvalPlus dataset") |
| 29 | print(to_drop) |
| 30 | data = [d for d in data if d["task_id"] not in to_drop] |
| 31 | |
| 32 | # convert pe_input into string |
| 33 | for d in data: |
| 34 | d["pe_input"] = json.dumps(d["pe_input"]) |
| 35 | d["entry_point"] = evalplus_datasets[d["task_id"]]["entry_point"] |
| 36 | d["prompt"] = evalplus_datasets[d["task_id"]]["prompt"] |
| 37 | |
| 38 | # combine |
| 39 | dataset = DatasetDict( |
| 40 | { |
| 41 | "test": Dataset.from_list(data, split="test"), |
| 42 | "demo": Dataset.from_list(data[:2], split="demo"), |
| 43 | } |
| 44 | ) |
| 45 | print(dataset) |
| 46 | |
| 47 | repo = list_repo_refs(REPO_ID, repo_type="dataset") |
| 48 | tags = [tag.name for tag in repo.tags] |
| 49 | print(REPO_ID, "has tags:", tags) |
| 50 | |
| 51 | print(f"Uploading dataset with tag {version} to Hub... Please enter to confirm:") |
| 52 | input() |
| 53 | |
| 54 | if version in tags and overwrite: |
| 55 | print(f"Tag {version} already exists, overwriting...") |
| 56 | delete_tag(REPO_ID, repo_type="dataset", tag=version) |
| 57 | |
| 58 | dataset.push_to_hub(REPO_ID, branch="main") |
| 59 | create_tag(REPO_ID, repo_type="dataset", tag=version) |
| 60 | |
| 61 | |
| 62 | if __name__ == "__main__": |
nothing calls this directly
no test coverage detected