(evaluation_test_type, problem, solution, silent=False, break_on_timeout=True)
| 46 | |
| 47 | |
| 48 | def evaluate_solution_on_subset(evaluation_test_type, problem, solution, silent=False, break_on_timeout=True): |
| 49 | # evaluate solution |
| 50 | test_results = None |
| 51 | if evaluation_test_type: |
| 52 | test_results = eval_solution(evaluation_test_type=evaluation_test_type, example=problem, prediction=solution, |
| 53 | silent=silent, break_on_timeout=break_on_timeout) |
| 54 | |
| 55 | if test_results[1] == []: |
| 56 | if not silent: |
| 57 | logger.info("=====================================") |
| 58 | logger.info("No tests") |
| 59 | logger.info("=====================================") |
| 60 | return test_results, 0, 0, 0 |
| 61 | |
| 62 | if (hasattr(test_results[1], 'compilation_result') and |
| 63 | test_results[1].compilation_result.program_status.name == 'kTimeout'): |
| 64 | if not silent: |
| 65 | logger.info("=====================================") |
| 66 | logger.info("Timeout") |
| 67 | logger.info("=====================================") |
| 68 | return test_results, 0, 0, len(test_results[0]) |
| 69 | |
| 70 | test_passed = 0 |
| 71 | test_failed = 0 |
| 72 | test_timeout = 0 |
| 73 | if not problem[evaluation_test_type]['input']: |
| 74 | logger.info(f"No {evaluation_test_type} for this problem") |
| 75 | else: |
| 76 | for test in test_results[1].test_results: |
| 77 | if (hasattr(test, 'program_status') and test.program_status.name == 'kTimeout'): |
| 78 | test_timeout += 1 |
| 79 | elif not test.passed: |
| 80 | test_failed += 1 |
| 81 | else: |
| 82 | test_passed += 1 |
| 83 | if not silent: |
| 84 | logger.info("=====================================") |
| 85 | logger.info(f"test_passed: {test_passed}, test_failed: {test_failed}, test_timeout: {test_timeout}") |
| 86 | logger.info("=====================================") |
| 87 | |
| 88 | return test_results, test_passed, test_failed, test_timeout |
| 89 | |
| 90 | |
| 91 | def evaluate_on_private_tests(evaluation_test_type, problem, solution, silent=True): |
no test coverage detected