Load all result files from a benchmark directory
(dirname)
| 21 | |
| 22 | |
| 23 | def load_results(dirname): |
| 24 | """Load all result files from a benchmark directory""" |
| 25 | dirname = Path(dirname) |
| 26 | |
| 27 | benchmark_dir = dirname |
| 28 | if not benchmark_dir.exists(): |
| 29 | benchmark_dir = Path("tmp.benchmarks") / dirname |
| 30 | if not benchmark_dir.exists(): |
| 31 | return None |
| 32 | |
| 33 | all_results = [] |
| 34 | parse_errors = [] # Track which exercises had parse errors for this model |
| 35 | |
| 36 | # Look in language subdirectories under exercises/practice |
| 37 | for fname in benchmark_dir.glob("*/exercises/practice/*/.aider.results.json"): |
| 38 | error = False |
| 39 | try: |
| 40 | results = json.loads(fname.read_text()) |
| 41 | error = "testcase" not in results |
| 42 | if not error: |
| 43 | # Add language info to results |
| 44 | lang = fname.parts[-5] # Get language from path |
| 45 | results["language"] = lang |
| 46 | all_results.append(results) |
| 47 | |
| 48 | except json.JSONDecodeError: |
| 49 | error = True |
| 50 | |
| 51 | if error: |
| 52 | # Track the parse error for this exercise/model combination |
| 53 | lang = fname.parts[-5] |
| 54 | exercise = f"{fname.parts[-2]}/{lang}" # Use directory name as testcase |
| 55 | parse_errors.append(exercise) |
| 56 | print(f"Bad results file {fname}") |
| 57 | continue |
| 58 | |
| 59 | return all_results, parse_errors |
| 60 | |
| 61 | |
| 62 | def analyze_exercise_solutions(dirs=None, topn=None, copy_hard_set=False): |
no test coverage detected