Function load_results

benchmark/problem_stats.py:23–59 · view source on GitHub ↗

Load all result files from a benchmark directory

(dirname)

Source from the content-addressed store, hash-verified

21
22
23	def load_results(dirname):
24	"""Load all result files from a benchmark directory"""
25	dirname = Path(dirname)
26
27	benchmark_dir = dirname
28	if not benchmark_dir.exists():
29	benchmark_dir = Path("tmp.benchmarks") / dirname
30	if not benchmark_dir.exists():
31	return None
32
33	all_results = []
34	parse_errors = [] # Track which exercises had parse errors for this model
35
36	# Look in language subdirectories under exercises/practice
37	for fname in benchmark_dir.glob("/exercises/practice//.aider.results.json"):
38	error = False
39	try:
40	results = json.loads(fname.read_text())
41	error = "testcase" not in results
42	if not error:
43	# Add language info to results
44	lang = fname.parts[-5] # Get language from path
45	results["language"] = lang
46	all_results.append(results)
47
48	except json.JSONDecodeError:
49	error = True
50
51	if error:
52	# Track the parse error for this exercise/model combination
53	lang = fname.parts[-5]
54	exercise = f"{fname.parts[-2]}/{lang}" # Use directory name as testcase
55	parse_errors.append(exercise)
56	print(f"Bad results file {fname}")
57	continue
58
59	return all_results, parse_errors
60
61
62	def analyze_exercise_solutions(dirs=None, topn=None, copy_hard_set=False):

analyze_exercise_solutionsFunction · 0.70

read_textMethod · 0.45

no test coverage detected