(example: Dict[str, Any], data_name)
| 573 | |
| 574 | |
| 575 | def parse_ground_truth(example: Dict[str, Any], data_name): |
| 576 | if "gt_cot" in example and "gt" in example: |
| 577 | if data_name in ["math"]: |
| 578 | gt_ans = extract_answer(example["gt_cot"], data_name) |
| 579 | elif data_name in STRIP_EXCEPTIONS: |
| 580 | gt_ans = example["gt"] |
| 581 | else: |
| 582 | gt_ans = strip_string(example["gt"]) |
| 583 | return example["gt_cot"], gt_ans |
| 584 | |
| 585 | # parse ground truth |
| 586 | if data_name in ["math", "minerva_math"]: |
| 587 | gt_cot = example["solution"] |
| 588 | gt_ans = extract_answer(gt_cot, data_name) |
| 589 | elif data_name == "gsm8k": |
| 590 | gt_cot, gt_ans = example["answer"].split("####") |
| 591 | elif data_name == "svamp": |
| 592 | gt_cot, gt_ans = example["Equation"], example["Answer"] |
| 593 | elif data_name == "asdiv": |
| 594 | gt_cot = example["formula"] |
| 595 | gt_ans = re.sub(r"\(.*?\)", "", example["answer"]) |
| 596 | elif data_name == "mawps": |
| 597 | gt_cot, gt_ans = None, example["target"] |
| 598 | elif data_name == "tabmwp": |
| 599 | gt_cot = example["solution"] |
| 600 | gt_ans = example["answer"] |
| 601 | if example["ans_type"] in ["integer_number", "decimal_number"]: |
| 602 | if "/" in gt_ans: |
| 603 | gt_ans = int(gt_ans.split("/")[0]) / int(gt_ans.split("/")[1]) |
| 604 | elif "," in gt_ans: |
| 605 | gt_ans = float(gt_ans.replace(",", "")) |
| 606 | elif "%" in gt_ans: |
| 607 | gt_ans = float(gt_ans.split("%")[0]) / 100 |
| 608 | else: |
| 609 | gt_ans = float(gt_ans) |
| 610 | elif data_name == "carp_en": |
| 611 | gt_cot, gt_ans = example["steps"], example["answer"] |
| 612 | elif data_name == "mmlu_stem": |
| 613 | abcd = "ABCD" |
| 614 | gt_cot, gt_ans = None, abcd[example["answer"]] |
| 615 | elif data_name == "sat_math": |
| 616 | gt_cot, gt_ans = None, example["Answer"] |
| 617 | elif data_name == "aqua": |
| 618 | gt_cot, gt_ans = None, example["correct"] |
| 619 | elif data_name in ["gaokao2023en", "college_math", "gaokao_math_cloze"]: |
| 620 | gt_cot, gt_ans = None, example["answer"].replace("$", "").strip() |
| 621 | elif data_name == "gaokao_math_qa": |
| 622 | gt_cot, gt_ans = None, example["label"] |
| 623 | elif data_name in ["gaokao2024_mix", "cn_middle_school"]: |
| 624 | if len(example["choice_answer"]) > 0: |
| 625 | gt_cot, gt_ans = None, example["choice_answer"] |
| 626 | else: |
| 627 | gt_cot, gt_ans = None, example["answer"] |
| 628 | elif data_name == "olympiadbench": |
| 629 | gt_cot, gt_ans = None, example["final_answer"][0].strip("$") |
| 630 | elif data_name in [ |
| 631 | "aime24", |
| 632 | "amc23", |
no test coverage detected