hub / github.com/evalplus/evalplus / check_correctness

Function check_correctness

evalplus/evaluate.py:79–124 · view source on GitHub ↗

(
    dataset: str,
    completion_id: int,
    problem: Dict[str, Any],
    solution: str,
    expected_output: Dict[str, List],
    base_only=False,
    fast_check=False,
    identifier=None,
    min_time_limit: float = DEFAULT_MIN_TIME_LIMIT,
    gt_time_limit_factor: float = DEFAULT_GT_TIME_LIMIT_FACTOR,
)

Source from the content-addressed store, hash-verified

77
78
79	def check_correctness(
80	dataset: str,
81	completion_id: int,
82	problem: Dict[str, Any],
83	solution: str,
84	expected_output: Dict[str, List],
85	base_only=False,
86	fast_check=False,
87	identifier=None,
88	min_time_limit: float = DEFAULT_MIN_TIME_LIMIT,
89	gt_time_limit_factor: float = DEFAULT_GT_TIME_LIMIT_FACTOR,
90	) -> Dict[str, Result]: # {...}, "base" \| "plus" -> (status, details)
91	ret = {
92	"completion_id": completion_id,
93	"task_id": problem["task_id"],
94	"_identifier": identifier,
95	"solution": solution,
96	}
97	ret["base"] = untrusted_check(
98	dataset,
99	solution,
100	problem["base_input"],
101	problem["entry_point"],
102	expected=expected_output["base"],
103	atol=problem["atol"],
104	ref_time=expected_output["base_time"],
105	fast_check=fast_check,
106	min_time_limit=min_time_limit,
107	gt_time_limit_factor=gt_time_limit_factor,
108	)
109
110	if not base_only:
111	ret["plus"] = untrusted_check(
112	dataset,
113	solution,
114	problem["plus_input"],
115	problem["entry_point"],
116	expected=expected_output["plus"],
117	atol=problem["atol"],
118	ref_time=expected_output["plus_time"],
119	fast_check=fast_check,
120	min_time_limit=min_time_limit,
121	gt_time_limit_factor=gt_time_limit_factor,
122	)
123
124	return ret
125
126
127	def evaluate(

Callers

nothing calls this directly

Calls 1

untrusted_checkFunction · 0.90

Tested by

no test coverage detected