(problems, hashcode, tasks_only_output_not_none)
| 40 | |
| 41 | |
| 42 | def get_groundtruth(problems, hashcode, tasks_only_output_not_none): |
| 43 | cache_file = os.path.join(CACHE_DIR, f"{hashcode}.pkl") |
| 44 | if os.path.exists(cache_file): |
| 45 | print(f"Load from ground-truth from {cache_file}") |
| 46 | with open(cache_file, "rb") as f: |
| 47 | return pickle.load(f) |
| 48 | |
| 49 | os.makedirs(CACHE_DIR, exist_ok=True) |
| 50 | print("Computing expected output...") |
| 51 | tbegin = time.time() |
| 52 | expected_output = {} |
| 53 | for task_id, problem in problems.items(): |
| 54 | oracle = {} |
| 55 | oracle["base"], oracle["base_time"] = trusted_exec( |
| 56 | problem["prompt"] + problem["canonical_solution"], |
| 57 | problem["base_input"], |
| 58 | problem["entry_point"], |
| 59 | record_time=True, |
| 60 | output_not_none=problem["entry_point"] in tasks_only_output_not_none, |
| 61 | ) |
| 62 | |
| 63 | oracle["plus"], oracle["plus_time"] = trusted_exec( |
| 64 | problem["prompt"] + problem["canonical_solution"], |
| 65 | problem["plus_input"], |
| 66 | problem["entry_point"], |
| 67 | record_time=True, |
| 68 | output_not_none=problem["entry_point"] in tasks_only_output_not_none, |
| 69 | ) |
| 70 | expected_output[task_id] = oracle |
| 71 | print(f"Expected outputs computed in {time.time() - tbegin:.2f}s") |
| 72 | |
| 73 | with open(cache_file, "wb") as f: |
| 74 | pickle.dump(expected_output, f) |
| 75 | |
| 76 | return expected_output |
| 77 | |
| 78 | |
| 79 | def check_correctness( |
no test coverage detected