Function get_groundtruth

evalplus/evaluate.py:42–76 · view source on GitHub ↗

(problems, hashcode, tasks_only_output_not_none)

Source from the content-addressed store, hash-verified

40
41
42	def get_groundtruth(problems, hashcode, tasks_only_output_not_none):
43	cache_file = os.path.join(CACHE_DIR, f"{hashcode}.pkl")
44	if os.path.exists(cache_file):
45	print(f"Load from ground-truth from {cache_file}")
46	with open(cache_file, "rb") as f:
47	return pickle.load(f)
48
49	os.makedirs(CACHE_DIR, exist_ok=True)
50	print("Computing expected output...")
51	tbegin = time.time()
52	expected_output = {}
53	for task_id, problem in problems.items():
54	oracle = {}
55	oracle["base"], oracle["base_time"] = trusted_exec(
56	problem["prompt"] + problem["canonical_solution"],
57	problem["base_input"],
58	problem["entry_point"],
59	record_time=True,
60	output_not_none=problem["entry_point"] in tasks_only_output_not_none,
61	)
62
63	oracle["plus"], oracle["plus_time"] = trusted_exec(
64	problem["prompt"] + problem["canonical_solution"],
65	problem["plus_input"],
66	problem["entry_point"],
67	record_time=True,
68	output_not_none=problem["entry_point"] in tasks_only_output_not_none,
69	)
70	expected_output[task_id] = oracle
71	print(f"Expected outputs computed in {time.time() - tbegin:.2f}s")
72
73	with open(cache_file, "wb") as f:
74	pickle.dump(expected_output, f)
75
76	return expected_output
77
78
79	def check_correctness(

scriptFunction · 0.90

mainFunction · 0.90

get_evalplus_dataFunction · 0.90

evaluateFunction · 0.85

trusted_execFunction · 0.90

no test coverage detected