(flags)
| 183 | |
| 184 | |
| 185 | def main(flags): |
| 186 | coverage_dir = os.path.join(flags.report_dir, "coverage_cache") |
| 187 | mutation_dir = os.path.join(flags.report_dir, "mutation_cache") |
| 188 | sample_dir = os.path.join(flags.report_dir, "sample_cache") |
| 189 | os.makedirs(flags.report_dir, exist_ok=True) |
| 190 | |
| 191 | exclude_model: str = flags.model |
| 192 | if exclude_model.endswith("b"): # format: model_name + parameter size |
| 193 | exclude_model = "".join(exclude_model.split("-")[:-1]) |
| 194 | |
| 195 | coverage_set_cover = get_coverage_set_cover( |
| 196 | coverage_dir, exclude_model, flags.dataset |
| 197 | ) |
| 198 | mutation_set_cover = get_mutation_set_cover( |
| 199 | mutation_dir, exclude_model, flags.dataset |
| 200 | ) |
| 201 | sample_set_cover = get_sample_set_cover( |
| 202 | sample_dir, flags.sample_eval_dir, exclude_model, flags.dataset |
| 203 | ) |
| 204 | merged_set_cover = merge_set_cover( |
| 205 | coverage_set_cover, mutation_set_cover, sample_set_cover |
| 206 | ) |
| 207 | |
| 208 | if flags.model != "ALL": |
| 209 | final_report = dict() |
| 210 | # Stage 1: Coverage min set cover |
| 211 | final_report["coverage"] = gen_report( |
| 212 | coverage_set_cover, flags.sample_eval_dir, flags.model |
| 213 | ) |
| 214 | # Stage 2: Mutation min set cover |
| 215 | final_report["mutation"] = gen_report( |
| 216 | mutation_set_cover, flags.sample_eval_dir, flags.model |
| 217 | ) |
| 218 | # Stage 3: Sampling min set cover |
| 219 | final_report["sample"] = gen_report( |
| 220 | sample_set_cover, flags.sample_eval_dir, flags.model |
| 221 | ) |
| 222 | # Stage 4: All |
| 223 | final_report["full"] = gen_report( |
| 224 | merged_set_cover, flags.sample_eval_dir, flags.model |
| 225 | ) |
| 226 | with open( |
| 227 | os.path.join(flags.report_dir, f"report_{flags.model}.json"), "w" |
| 228 | ) as f: |
| 229 | json.dump(final_report, f, indent=4) |
| 230 | else: |
| 231 | dump_humaneval_plus_mini(merged_set_cover, flags.mini_path) |
| 232 | |
| 233 | |
| 234 | if __name__ == "__main__": |
no test coverage detected