Test solutions, return functionally correct solutions
(
dataset: str, solutions: List[str], task: Dict, expected_output: List
)
| 126 | |
| 127 | |
| 128 | def test_solutions( |
| 129 | dataset: str, solutions: List[str], task: Dict, expected_output: List |
| 130 | ) -> List[str]: |
| 131 | """Test solutions, return functionally correct solutions""" |
| 132 | n_workers = max(1, multiprocessing.cpu_count() // 2) |
| 133 | correct_solution_ids = [] |
| 134 | |
| 135 | with ProcessPoolExecutor(max_workers=n_workers) as executor: |
| 136 | futures = [ |
| 137 | executor.submit( |
| 138 | correctness_check, index, solution, dataset, task, expected_output |
| 139 | ) |
| 140 | for index, solution in enumerate(solutions) |
| 141 | ] |
| 142 | for future in as_completed(futures): |
| 143 | index, result, _ = future.result() |
| 144 | if result[0] == PASS: |
| 145 | correct_solution_ids.append(index) |
| 146 | |
| 147 | return [solutions[i] for i in correct_solution_ids] |
| 148 | |
| 149 | |
| 150 | def script(sample_dir: str, dataset: str = "humaneval", debug_task: str = None): |