We accept two formats of inputs. + `sample.jsonl` which is the format from HumanEval, i.e., {task_id, completion or solution}. + A folder which contains sub-folders named after the task_id. Each sub-folder contains samples named in `[?].py` where `?` is the solution id starting with 0.
(sample_path: PathLike)
| 89 | |
| 90 | |
| 91 | def load_solutions(sample_path: PathLike) -> Iterable[Dict]: |
| 92 | """We accept two formats of inputs. |
| 93 | + `sample.jsonl` which is the format from HumanEval, i.e., {task_id, completion or solution}. |
| 94 | + A folder which contains sub-folders named after the task_id. Each sub-folder |
| 95 | contains samples named in `[?].py` where `?` is the solution id starting with 0. |
| 96 | Different from `sample.jsonl`, the solutions must be complete (with prompt prefix). |
| 97 | """ |
| 98 | |
| 99 | # if it is a file |
| 100 | if os.path.isfile(sample_path): |
| 101 | for i, sample in enumerate(stream_jsonl(sample_path)): |
| 102 | assert ( |
| 103 | "completion" in sample or "solution" in sample |
| 104 | ), "No completion or solution found in sample!" |
| 105 | assert "solution" not in sample or isinstance( |
| 106 | sample["solution"], str |
| 107 | ), "Solution must be a string! If you have multiple solutions, please repeat the task_id." |
| 108 | assert "completion" not in sample or isinstance( |
| 109 | sample["completion"], str |
| 110 | ), "Completion must be a string! If you have multiple solutions, please repeat the task_id." |
| 111 | |
| 112 | sample["_identifier"] = ( |
| 113 | sample["task_id"] + f" (line {i+1} in {sample_path})" |
| 114 | ) |
| 115 | yield sample |
| 116 | else: |
| 117 | # if it is a folder |
| 118 | for task_id in os.listdir(sample_path): |
| 119 | task_path = os.path.join(sample_path, task_id) |
| 120 | if not os.path.isdir(task_path): |
| 121 | continue |
| 122 | |
| 123 | for solution_id in os.listdir(task_path): |
| 124 | solution_path = os.path.join(task_path, solution_id) |
| 125 | if os.path.isfile(solution_path) and solution_path.endswith(".py"): |
| 126 | with open(solution_path, "r") as f: |
| 127 | completion = f.read() |
| 128 | yield { |
| 129 | "_identifier": solution_path, |
| 130 | "_path": solution_path, |
| 131 | "task_id": task_id.replace("_", "/"), |
| 132 | "solution": completion, |
| 133 | } |
| 134 | |
| 135 | |
| 136 | def write_directory(directory: PathLike, data: Iterable[Dict]): |