(pred_str, data_name, use_last_number=True)
| 497 | |
| 498 | |
| 499 | def extract_answer(pred_str, data_name, use_last_number=True): |
| 500 | pred_str = pred_str.replace("\u043a\u0438", "") |
| 501 | if data_name in ["mmlu_stem", "sat_math", "aqua", "gaokao2023"]: |
| 502 | # TODO check multiple choice |
| 503 | return choice_answer_clean(pred_str) |
| 504 | |
| 505 | if "final answer is $" in pred_str and "$. I hope" in pred_str: |
| 506 | # minerva_math |
| 507 | tmp = pred_str.split("final answer is $", 1)[1] |
| 508 | pred = tmp.split("$. I hope", 1)[0].strip() |
| 509 | elif "boxed" in pred_str: |
| 510 | ans = pred_str.split("boxed")[-1] |
| 511 | if len(ans) == 0: |
| 512 | return "" |
| 513 | elif ans[0] == "{": |
| 514 | stack = 1 |
| 515 | a = "" |
| 516 | for c in ans[1:]: |
| 517 | if c == "{": |
| 518 | stack += 1 |
| 519 | a += c |
| 520 | elif c == "}": |
| 521 | stack -= 1 |
| 522 | if stack == 0: |
| 523 | break |
| 524 | a += c |
| 525 | else: |
| 526 | a += c |
| 527 | else: |
| 528 | a = ans.split("$")[0].strip() |
| 529 | pred = a |
| 530 | elif "he answer is" in pred_str: |
| 531 | pred = pred_str.split("he answer is")[-1].strip() |
| 532 | elif "final answer is" in pred_str: |
| 533 | pred = pred_str.split("final answer is")[-1].strip() |
| 534 | elif "答案是" in pred_str: |
| 535 | # Handle Chinese few-shot multiple choice problem answer extraction |
| 536 | pred = pred_str.split("答案是")[1].strip().split("\n\n")[0].strip() |
| 537 | else: # use the last number |
| 538 | if use_last_number: |
| 539 | pattern = "-?\d*\.?\d+" |
| 540 | pred = re.findall(pattern, pred_str.replace(",", "")) |
| 541 | if len(pred) >= 1: |
| 542 | pred = pred[-1] |
| 543 | else: |
| 544 | pred = "" |
| 545 | else: |
| 546 | pred = "" |
| 547 | |
| 548 | # choice answer |
| 549 | if ( |
| 550 | data_name in ["sat_math", "aqua"] |
| 551 | or "mmlu" in data_name |
| 552 | ): |
| 553 | tmp = re.findall(r"\b(A|B|C|D|E)\b", pred.upper()) |
| 554 | if tmp: |
| 555 | pred = tmp[-1] |
| 556 | else: |
no test coverage detected