(directory, sanitize_code: bool = True)
| 10 | |
| 11 | |
| 12 | def main(directory, sanitize_code: bool = True): |
| 13 | basename = os.path.basename(directory) |
| 14 | parent_dir = os.path.dirname(directory) |
| 15 | target_jsonl_path = os.path.join(parent_dir, f"{basename}.jsonl") |
| 16 | |
| 17 | datasets = {**get_human_eval_plus(), **get_mbpp_plus()} |
| 18 | |
| 19 | with open(target_jsonl_path, "w") as f: |
| 20 | # iterate directories |
| 21 | for subdir_name in tqdm(os.listdir(directory)): |
| 22 | subdir_path = os.path.join(directory, subdir_name) |
| 23 | if not os.path.isdir(subdir_path): |
| 24 | continue |
| 25 | |
| 26 | assert "_" in subdir_name |
| 27 | dataset_name, task_num = subdir_name.split("_") |
| 28 | task_id = f"{dataset_name}/{task_num}" |
| 29 | entrypoint = datasets[task_id]["entry_point"] |
| 30 | for sample_name in os.listdir(subdir_path): |
| 31 | if not sample_name.endswith(".py"): |
| 32 | continue |
| 33 | code = open(os.path.join(subdir_path, sample_name)).read() |
| 34 | if sanitize_code: |
| 35 | try: |
| 36 | code = sanitize(code, entrypoint=entrypoint) |
| 37 | except ValueError as e: |
| 38 | print(f"Failed to sanitize {task_id}/{sample_name}: {e}") |
| 39 | print(code) |
| 40 | continue |
| 41 | |
| 42 | f.write(json.dumps({"task_id": task_id, "solution": code}) + "\n") |
| 43 | |
| 44 | |
| 45 | if __name__ == "__main__": |
nothing calls this directly
no test coverage detected