MCPcopy
hub / github.com/Codium-ai/AlphaCodium / evaluate_solution_on_subset

Function evaluate_solution_on_subset

alpha_codium/gen/utils.py:48–88  ·  view source on GitHub ↗
(evaluation_test_type, problem, solution, silent=False, break_on_timeout=True)

Source from the content-addressed store, hash-verified

46
47
48def evaluate_solution_on_subset(evaluation_test_type, problem, solution, silent=False, break_on_timeout=True):
49 # evaluate solution
50 test_results = None
51 if evaluation_test_type:
52 test_results = eval_solution(evaluation_test_type=evaluation_test_type, example=problem, prediction=solution,
53 silent=silent, break_on_timeout=break_on_timeout)
54
55 if test_results[1] == []:
56 if not silent:
57 logger.info("=====================================")
58 logger.info("No tests")
59 logger.info("=====================================")
60 return test_results, 0, 0, 0
61
62 if (hasattr(test_results[1], 'compilation_result') and
63 test_results[1].compilation_result.program_status.name == 'kTimeout'):
64 if not silent:
65 logger.info("=====================================")
66 logger.info("Timeout")
67 logger.info("=====================================")
68 return test_results, 0, 0, len(test_results[0])
69
70 test_passed = 0
71 test_failed = 0
72 test_timeout = 0
73 if not problem[evaluation_test_type]['input']:
74 logger.info(f"No {evaluation_test_type} for this problem")
75 else:
76 for test in test_results[1].test_results:
77 if (hasattr(test, 'program_status') and test.program_status.name == 'kTimeout'):
78 test_timeout += 1
79 elif not test.passed:
80 test_failed += 1
81 else:
82 test_passed += 1
83 if not silent:
84 logger.info("=====================================")
85 logger.info(f"test_passed: {test_passed}, test_failed: {test_failed}, test_timeout: {test_timeout}")
86 logger.info("=====================================")
87
88 return test_results, test_passed, test_failed, test_timeout
89
90
91def evaluate_on_private_tests(evaluation_test_type, problem, solution, silent=True):

Callers 4

solve_problemFunction · 0.90
solve_my_problemFunction · 0.90
solve_datasetFunction · 0.90
calc_is_valid_problemFunction · 0.90

Calls 1

eval_solutionFunction · 0.90

Tested by

no test coverage detected