MCPcopy
hub / github.com/Codium-ai/AlphaCodium / solve_dataset

Function solve_dataset

alpha_codium/gen/dataset_solver.py:13–117  ·  view source on GitHub ↗
(dataset_name='valid_and_test_processed',
                  split_name='valid',
                  database_solution_path='solution_database.json')

Source from the content-addressed store, hash-verified

11
12
13def solve_dataset(dataset_name='valid_and_test_processed',
14 split_name='valid',
15 database_solution_path='solution_database.json'):
16
17 # load dataset
18 data_provider = CodeContestDataProvider(dataset_location=dataset_name)
19 setting = get_settings()
20 num_problems = len(data_provider.dataset[split_name])
21 base_path = os.getcwd()
22 setting.solve.reduce_verbose = True
23
24 ## load previous solution-database if exists
25 try:
26 with open(database_solution_path, 'r') as f:
27 database = json.load(f)
28 database[split_name] = OrderedDict(sorted(database[split_name].items(), key=lambda x: int(x[0])))
29 except:
30 print(f"Failed to load database from {database_solution_path}")
31 database = {split_name: {}}
32
33 # iterate on problems
34 for problem_number in range(0, num_problems):
35
36 # skip if already ran
37 logger = setup_logger()
38
39 num_iterations = setting.get("dataset.num_iterations", 1)
40 prev = database[split_name].get(str(problem_number), {}).get(f'iteration_{num_iterations-1}', {})
41 if not ((prev == {}) or (prev is None)):
42 print(f"problem_number {problem_number} already ran")
43 continue
44
45 # check if problem is valid (at least one of the provided solutions actually passes the generated tests)
46 if data_provider.dataset[split_name][problem_number].get('is_valid_problem', True) is False:
47 logger.info(f"problem {problem_number} is not valid")
48 continue
49
50 os.chdir(base_path)
51 logger.info(f"problem_number: {problem_number}")
52 problem_name = data_provider.dataset[split_name][int(problem_number)]['name']
53 logger.info(f"problem_name: {problem_name}")
54 problem = data_provider.find_problem(ds=data_provider.dataset, problem_name=problem_name, split_name=split_name)
55 logger.info(f"problem['cf_tags']: {problem['cf_tags']}")
56
57 # solve problem
58 problem_database = {problem_number: {}}
59 solver = CodeContestsCompetitor()
60 for iteration in range(setting.get("dataset.num_iterations", 1)):
61 it_str = f"iteration_{iteration}"
62 problem_database[problem_number][it_str] = {}
63
64 # skip if iteration already ran
65 prev_iter = database[split_name].get(str(problem_number), {}).get(it_str, {})
66 if not ((prev_iter == {}) or (prev_iter is None)):
67 print(f"prev_iter {iteration} already ran")
68 problem_database[problem_number][it_str] = prev_iter
69 if is_solved(prev_iter):
70 logger.info(f"codium solved problem {problem_number} in iteration {iteration}")

Callers 1

solve_dataset.pyFile · 0.90

Calls 8

find_problemMethod · 0.95
get_settingsFunction · 0.90
setup_loggerFunction · 0.90
is_solvedFunction · 0.85

Tested by

no test coverage detected