Compare predicted answer with ground truth Uses both exact match and semantic equivalence
(predicted: str, ground_truth: str)
| 89 | |
| 90 | |
| 91 | def compare_answers(predicted: str, ground_truth: str) -> bool: |
| 92 | """ |
| 93 | Compare predicted answer with ground truth |
| 94 | Uses both exact match and semantic equivalence |
| 95 | """ |
| 96 | if not predicted or not ground_truth: |
| 97 | return False |
| 98 | |
| 99 | # Normalize both answers |
| 100 | pred_norm = normalize_answer(predicted) |
| 101 | truth_norm = normalize_answer(ground_truth) |
| 102 | |
| 103 | # Exact match after normalization |
| 104 | if pred_norm == truth_norm: |
| 105 | return True |
| 106 | |
| 107 | # Check if one contains the other (for cases like "4" in "c = 4") |
| 108 | if pred_norm in truth_norm or truth_norm in pred_norm: |
| 109 | return True |
| 110 | |
| 111 | # Try numeric comparison if possible |
| 112 | try: |
| 113 | pred_num = float(re.sub(r'[^0-9.-]', '', predicted)) |
| 114 | truth_num = float(re.sub(r'[^0-9.-]', '', ground_truth)) |
| 115 | if abs(pred_num - truth_num) < 1e-6: |
| 116 | return True |
| 117 | except (ValueError, TypeError): |
| 118 | pass |
| 119 | |
| 120 | return False |
| 121 | |
| 122 | |
| 123 | def extract_answer_from_solution(solution: str, problem_id: str = None) -> str: |
no test coverage detected