| 85 | |
| 86 | |
| 87 | def get_data(paths: List[PathLike]): |
| 88 | task2bvs_old = None |
| 89 | task2bvs_new = None |
| 90 | |
| 91 | for path in tqdm(paths): # each experiment |
| 92 | res = json.load(open(path, "r"))["eval"] |
| 93 | ntask = len(res) |
| 94 | |
| 95 | assert ntask == 164 |
| 96 | |
| 97 | if task2bvs_old is None and task2bvs_new is None: |
| 98 | task2bvs_old = [[] for _ in range(ntask)] |
| 99 | task2bvs_new = [[] for _ in range(ntask)] |
| 100 | # i-th => task-i pass rate for an experiment |
| 101 | |
| 102 | for i, v in enumerate(res.values()): # each task |
| 103 | base = v["base"] |
| 104 | plus = v["plus"] |
| 105 | bbv = np.array([s == SUCCESS for s, _ in base]) |
| 106 | pbv = np.array([s == SUCCESS for s, _ in plus]) & bbv |
| 107 | assert bbv.mean() >= pbv.mean() |
| 108 | |
| 109 | task2bvs_old[i].append(bbv) |
| 110 | task2bvs_new[i].append(pbv) |
| 111 | |
| 112 | assert len(task2bvs_old) == len(task2bvs_new) |
| 113 | return task2bvs_old, task2bvs_new |
| 114 | |
| 115 | |
| 116 | if __name__ == "__main__": |