Estimates pass@k of each problem and returns them in an array.
(
num_samples: Union[int, List[int], np.ndarray],
num_correct: Union[List[int], np.ndarray],
k: int,
)
| 57 | |
| 58 | # unbiased estimator from https://github.com/openai/human-eval |
| 59 | def estimate_pass_at_k( |
| 60 | num_samples: Union[int, List[int], np.ndarray], |
| 61 | num_correct: Union[List[int], np.ndarray], |
| 62 | k: int, |
| 63 | ) -> np.ndarray: |
| 64 | """ |
| 65 | Estimates pass@k of each problem and returns them in an array. |
| 66 | """ |
| 67 | |
| 68 | def estimator(n: int, c: int, k: int) -> float: |
| 69 | """ |
| 70 | Calculates 1 - comb(n - c, k) / comb(n, k). |
| 71 | """ |
| 72 | if n - c < k: |
| 73 | return 1.0 |
| 74 | return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1)) |
| 75 | |
| 76 | if isinstance(num_samples, int): |
| 77 | num_samples_it = itertools.repeat(num_samples, len(num_correct)) |
| 78 | else: |
| 79 | assert len(num_samples) == len(num_correct) |
| 80 | num_samples_it = iter(num_samples) |
| 81 | |
| 82 | return np.array( |
| 83 | [estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)] |
| 84 | ) |
| 85 | |
| 86 | |
| 87 | PASS = "pass" |
no test coverage detected