(
dataset: str,
completion_id: int,
problem: Dict[str, Any],
solution: str,
expected_output: Dict[str, List],
base_only=False,
fast_check=False,
identifier=None,
min_time_limit: float = DEFAULT_MIN_TIME_LIMIT,
gt_time_limit_factor: float = DEFAULT_GT_TIME_LIMIT_FACTOR,
)
| 77 | |
| 78 | |
| 79 | def check_correctness( |
| 80 | dataset: str, |
| 81 | completion_id: int, |
| 82 | problem: Dict[str, Any], |
| 83 | solution: str, |
| 84 | expected_output: Dict[str, List], |
| 85 | base_only=False, |
| 86 | fast_check=False, |
| 87 | identifier=None, |
| 88 | min_time_limit: float = DEFAULT_MIN_TIME_LIMIT, |
| 89 | gt_time_limit_factor: float = DEFAULT_GT_TIME_LIMIT_FACTOR, |
| 90 | ) -> Dict[str, Result]: # {...}, "base" | "plus" -> (status, details) |
| 91 | ret = { |
| 92 | "completion_id": completion_id, |
| 93 | "task_id": problem["task_id"], |
| 94 | "_identifier": identifier, |
| 95 | "solution": solution, |
| 96 | } |
| 97 | ret["base"] = untrusted_check( |
| 98 | dataset, |
| 99 | solution, |
| 100 | problem["base_input"], |
| 101 | problem["entry_point"], |
| 102 | expected=expected_output["base"], |
| 103 | atol=problem["atol"], |
| 104 | ref_time=expected_output["base_time"], |
| 105 | fast_check=fast_check, |
| 106 | min_time_limit=min_time_limit, |
| 107 | gt_time_limit_factor=gt_time_limit_factor, |
| 108 | ) |
| 109 | |
| 110 | if not base_only: |
| 111 | ret["plus"] = untrusted_check( |
| 112 | dataset, |
| 113 | solution, |
| 114 | problem["plus_input"], |
| 115 | problem["entry_point"], |
| 116 | expected=expected_output["plus"], |
| 117 | atol=problem["atol"], |
| 118 | ref_time=expected_output["plus_time"], |
| 119 | fast_check=fast_check, |
| 120 | min_time_limit=min_time_limit, |
| 121 | gt_time_limit_factor=gt_time_limit_factor, |
| 122 | ) |
| 123 | |
| 124 | return ret |
| 125 | |
| 126 | |
| 127 | def evaluate( |
nothing calls this directly
no test coverage detected