Verify if the solution contains the correct answer format for problems with specific answers
(problem_id: int, solution: str)
| 125 | ] |
| 126 | |
| 127 | def verify_answer_format(problem_id: int, solution: str) -> Dict[str, Any]: |
| 128 | """ |
| 129 | Verify if the solution contains the correct answer format for problems with specific answers |
| 130 | """ |
| 131 | result = { |
| 132 | "correct_answer_found": False, |
| 133 | "extracted_answer": None, |
| 134 | "answer_score": 0.0, |
| 135 | "error_message": "" |
| 136 | } |
| 137 | |
| 138 | solution_clean = solution.lower().replace(" ", "").replace("\n", " ") |
| 139 | |
| 140 | if problem_id == 1: |
| 141 | # Expected: {0, 1, 3} |
| 142 | # Look for sets containing 0, 1, 3 |
| 143 | set_patterns = [ |
| 144 | r"\{0,1,3\}", |
| 145 | r"\{0,\s*1,\s*3\}", |
| 146 | r"\{1,0,3\}", |
| 147 | r"\{3,1,0\}", |
| 148 | # Allow other orderings |
| 149 | r"\{[013,\s]+\}" # General pattern |
| 150 | ] |
| 151 | |
| 152 | for pattern in set_patterns: |
| 153 | if re.search(pattern, solution_clean): |
| 154 | # Verify it actually contains exactly 0, 1, 3 |
| 155 | numbers = re.findall(r'\d+', re.search(pattern, solution_clean).group()) |
| 156 | if sorted([int(x) for x in numbers]) == [0, 1, 3]: |
| 157 | result["correct_answer_found"] = True |
| 158 | result["extracted_answer"] = "{0, 1, 3}" |
| 159 | result["answer_score"] = 1.0 |
| 160 | break |
| 161 | |
| 162 | elif problem_id == 3: |
| 163 | # Expected: 4 |
| 164 | # Look for "c = 4" or "constant is 4" etc. |
| 165 | if re.search(r"c\s*=\s*4(?![0-9])", solution) or \ |
| 166 | re.search(r"constant.*4(?![0-9])", solution) or \ |
| 167 | re.search(r"answer.*4(?![0-9])", solution): |
| 168 | result["correct_answer_found"] = True |
| 169 | result["extracted_answer"] = "4" |
| 170 | result["answer_score"] = 1.0 |
| 171 | |
| 172 | elif problem_id == 4: |
| 173 | # Expected: 6J·12^K where gcd(J,10)=1 |
| 174 | # Look for the formula pattern |
| 175 | patterns = [ |
| 176 | r"6j.*12\^k", |
| 177 | r"6.*j.*12\^k", |
| 178 | r"a_1\s*=\s*6.*12", |
| 179 | r"6.*\*.*12\^" |
| 180 | ] |
| 181 | |
| 182 | for pattern in patterns: |
| 183 | if re.search(pattern, solution_clean): |
| 184 | result["correct_answer_found"] = True |
no test coverage detected