Compare the answer (from Agent) and label (GT). Label can be either a string or a number. If label is a number, we allow 10% margin. Otherwise, we do the best-effort string matching.
(answer: str, label: str)
| 61 | |
| 62 | |
| 63 | def compare_answer(answer: str, label: str): |
| 64 | """Compare the answer (from Agent) and label (GT). |
| 65 | Label can be either a string or a number. |
| 66 | If label is a number, we allow 10% margin. |
| 67 | Otherwise, we do the best-effort string matching. |
| 68 | """ |
| 69 | if answer is None: |
| 70 | return False |
| 71 | |
| 72 | # see if label is a number, e.g. "1.0" or "1" |
| 73 | if is_number(label): |
| 74 | label = float(label) |
| 75 | # try cast answer to float and return false if it fails |
| 76 | try: |
| 77 | answer = float(answer) |
| 78 | except: |
| 79 | return False |
| 80 | # allow 10% margin |
| 81 | if answer > label * 0.9 and answer < label * 1.1: |
| 82 | return True |
| 83 | else: |
| 84 | return False |
| 85 | |
| 86 | else: |
| 87 | label = normalize_answer(label) |
| 88 | answer = normalize_answer(answer) |
| 89 | return answer == label |
no test coverage detected