MCPcopy Index your code
hub / github.com/evalplus/evalplus / check_correctness

Function check_correctness

evalplus/evaluate.py:79–124  ·  view source on GitHub ↗
(
    dataset: str,
    completion_id: int,
    problem: Dict[str, Any],
    solution: str,
    expected_output: Dict[str, List],
    base_only=False,
    fast_check=False,
    identifier=None,
    min_time_limit: float = DEFAULT_MIN_TIME_LIMIT,
    gt_time_limit_factor: float = DEFAULT_GT_TIME_LIMIT_FACTOR,
)

Source from the content-addressed store, hash-verified

77
78
79def check_correctness(
80 dataset: str,
81 completion_id: int,
82 problem: Dict[str, Any],
83 solution: str,
84 expected_output: Dict[str, List],
85 base_only=False,
86 fast_check=False,
87 identifier=None,
88 min_time_limit: float = DEFAULT_MIN_TIME_LIMIT,
89 gt_time_limit_factor: float = DEFAULT_GT_TIME_LIMIT_FACTOR,
90) -> Dict[str, Result]: # {...}, "base" | "plus" -> (status, details)
91 ret = {
92 "completion_id": completion_id,
93 "task_id": problem["task_id"],
94 "_identifier": identifier,
95 "solution": solution,
96 }
97 ret["base"] = untrusted_check(
98 dataset,
99 solution,
100 problem["base_input"],
101 problem["entry_point"],
102 expected=expected_output["base"],
103 atol=problem["atol"],
104 ref_time=expected_output["base_time"],
105 fast_check=fast_check,
106 min_time_limit=min_time_limit,
107 gt_time_limit_factor=gt_time_limit_factor,
108 )
109
110 if not base_only:
111 ret["plus"] = untrusted_check(
112 dataset,
113 solution,
114 problem["plus_input"],
115 problem["entry_point"],
116 expected=expected_output["plus"],
117 atol=problem["atol"],
118 ref_time=expected_output["plus_time"],
119 fast_check=fast_check,
120 min_time_limit=min_time_limit,
121 gt_time_limit_factor=gt_time_limit_factor,
122 )
123
124 return ret
125
126
127def evaluate(

Callers

nothing calls this directly

Calls 1

untrusted_checkFunction · 0.90

Tested by

no test coverage detected