MCPcopy Index your code
hub / github.com/evalplus/evalplus / get_groundtruth

Function get_groundtruth

evalplus/evaluate.py:42–76  ·  view source on GitHub ↗
(problems, hashcode, tasks_only_output_not_none)

Source from the content-addressed store, hash-verified

40
41
42def get_groundtruth(problems, hashcode, tasks_only_output_not_none):
43 cache_file = os.path.join(CACHE_DIR, f"{hashcode}.pkl")
44 if os.path.exists(cache_file):
45 print(f"Load from ground-truth from {cache_file}")
46 with open(cache_file, "rb") as f:
47 return pickle.load(f)
48
49 os.makedirs(CACHE_DIR, exist_ok=True)
50 print("Computing expected output...")
51 tbegin = time.time()
52 expected_output = {}
53 for task_id, problem in problems.items():
54 oracle = {}
55 oracle["base"], oracle["base_time"] = trusted_exec(
56 problem["prompt"] + problem["canonical_solution"],
57 problem["base_input"],
58 problem["entry_point"],
59 record_time=True,
60 output_not_none=problem["entry_point"] in tasks_only_output_not_none,
61 )
62
63 oracle["plus"], oracle["plus_time"] = trusted_exec(
64 problem["prompt"] + problem["canonical_solution"],
65 problem["plus_input"],
66 problem["entry_point"],
67 record_time=True,
68 output_not_none=problem["entry_point"] in tasks_only_output_not_none,
69 )
70 expected_output[task_id] = oracle
71 print(f"Expected outputs computed in {time.time() - tbegin:.2f}s")
72
73 with open(cache_file, "wb") as f:
74 pickle.dump(expected_output, f)
75
76 return expected_output
77
78
79def check_correctness(

Callers 5

scriptFunction · 0.90
mainFunction · 0.90
mainFunction · 0.90
get_evalplus_dataFunction · 0.90
evaluateFunction · 0.85

Calls 1

trusted_execFunction · 0.90

Tested by

no test coverage detected