(dataset_name='valid_and_test_processed',
split_name='valid',
database_solution_path='solution_database.json')
| 11 | |
| 12 | |
| 13 | def solve_dataset(dataset_name='valid_and_test_processed', |
| 14 | split_name='valid', |
| 15 | database_solution_path='solution_database.json'): |
| 16 | |
| 17 | # load dataset |
| 18 | data_provider = CodeContestDataProvider(dataset_location=dataset_name) |
| 19 | setting = get_settings() |
| 20 | num_problems = len(data_provider.dataset[split_name]) |
| 21 | base_path = os.getcwd() |
| 22 | setting.solve.reduce_verbose = True |
| 23 | |
| 24 | ## load previous solution-database if exists |
| 25 | try: |
| 26 | with open(database_solution_path, 'r') as f: |
| 27 | database = json.load(f) |
| 28 | database[split_name] = OrderedDict(sorted(database[split_name].items(), key=lambda x: int(x[0]))) |
| 29 | except: |
| 30 | print(f"Failed to load database from {database_solution_path}") |
| 31 | database = {split_name: {}} |
| 32 | |
| 33 | # iterate on problems |
| 34 | for problem_number in range(0, num_problems): |
| 35 | |
| 36 | # skip if already ran |
| 37 | logger = setup_logger() |
| 38 | |
| 39 | num_iterations = setting.get("dataset.num_iterations", 1) |
| 40 | prev = database[split_name].get(str(problem_number), {}).get(f'iteration_{num_iterations-1}', {}) |
| 41 | if not ((prev == {}) or (prev is None)): |
| 42 | print(f"problem_number {problem_number} already ran") |
| 43 | continue |
| 44 | |
| 45 | # check if problem is valid (at least one of the provided solutions actually passes the generated tests) |
| 46 | if data_provider.dataset[split_name][problem_number].get('is_valid_problem', True) is False: |
| 47 | logger.info(f"problem {problem_number} is not valid") |
| 48 | continue |
| 49 | |
| 50 | os.chdir(base_path) |
| 51 | logger.info(f"problem_number: {problem_number}") |
| 52 | problem_name = data_provider.dataset[split_name][int(problem_number)]['name'] |
| 53 | logger.info(f"problem_name: {problem_name}") |
| 54 | problem = data_provider.find_problem(ds=data_provider.dataset, problem_name=problem_name, split_name=split_name) |
| 55 | logger.info(f"problem['cf_tags']: {problem['cf_tags']}") |
| 56 | |
| 57 | # solve problem |
| 58 | problem_database = {problem_number: {}} |
| 59 | solver = CodeContestsCompetitor() |
| 60 | for iteration in range(setting.get("dataset.num_iterations", 1)): |
| 61 | it_str = f"iteration_{iteration}" |
| 62 | problem_database[problem_number][it_str] = {} |
| 63 | |
| 64 | # skip if iteration already ran |
| 65 | prev_iter = database[split_name].get(str(problem_number), {}).get(it_str, {}) |
| 66 | if not ((prev_iter == {}) or (prev_iter is None)): |
| 67 | print(f"prev_iter {iteration} already ran") |
| 68 | problem_database[problem_number][it_str] = prev_iter |
| 69 | if is_solved(prev_iter): |
| 70 | logger.info(f"codium solved problem {problem_number} in iteration {iteration}") |
no test coverage detected