Get HumanEvalPlus locally. Args: err_incomplete (bool, optional): Whether to raise error if HumanEvalPlus is not complete. Defaults to True. mini (bool, optional): Whether to use the mini version of HumanEvalPlus. Defaults to False. Returns: List[Dict[str, str]]: List
(
err_incomplete=True, mini=False, noextreme=False, version="default"
)
| 40 | |
| 41 | |
| 42 | def get_human_eval_plus( |
| 43 | err_incomplete=True, mini=False, noextreme=False, version="default" |
| 44 | ) -> Dict[str, Dict]: |
| 45 | """Get HumanEvalPlus locally. |
| 46 | Args: |
| 47 | err_incomplete (bool, optional): Whether to raise error if HumanEvalPlus is not complete. Defaults to True. |
| 48 | mini (bool, optional): Whether to use the mini version of HumanEvalPlus. Defaults to False. |
| 49 | Returns: |
| 50 | List[Dict[str, str]]: List of dicts with keys "task_id", "prompt", "contract", "canonical_solution", "base_input" |
| 51 | Notes: |
| 52 | "task_id" is the identifier string for the task |
| 53 | "prompt" is the function signature with docstring |
| 54 | "contract" is the assertions for the function's input (validity) |
| 55 | "canonical_solution" is the ground-truth implementation for diff-testing |
| 56 | "base_input" is the test inputs from original HumanEval |
| 57 | "plus_input" is the test inputs brought by EvalPlus |
| 58 | "atol" is the absolute tolerance for diff-testing |
| 59 | """ |
| 60 | plus_path = _ready_human_eval_plus_path( |
| 61 | mini=mini, noextreme=noextreme, version=version |
| 62 | ) |
| 63 | plus = {task["task_id"]: task for task in stream_jsonl(plus_path)} |
| 64 | if err_incomplete: |
| 65 | completeness_check("HumanEval+", plus) |
| 66 | return plus |
| 67 | |
| 68 | |
| 69 | def get_human_eval() -> Dict[str, Dict]: |