(self, dataset)
| 550 | return ['GMAI-MMBench_VAL', 'GMAI-MMBench_TEST'] |
| 551 | |
| 552 | def load_data(self, dataset): |
| 553 | if dataset == 'GMAI-MMBench_VAL': |
| 554 | data_path = osp.join(LMUDataRoot(), f'{dataset}.tsv') |
| 555 | if file_size(data_path, 'GB') > 1: |
| 556 | local_path = data_path.replace('.tsv', '_local.tsv') |
| 557 | if not osp.exists(local_path) or os.environ.get('FORCE_LOCAL'): |
| 558 | from ..tools import LOCALIZE |
| 559 | LOCALIZE(data_path, local_path) |
| 560 | data_path = local_path |
| 561 | return load(data_path) |
| 562 | elif dataset == 'GMAI-MMBench_TEST': |
| 563 | dfs = [] |
| 564 | for part_num in range(1, 12): |
| 565 | part_name = f'GMAI_mm_bench_TEST_part_{part_num}' |
| 566 | url = self.DATASET_URL[part_name] |
| 567 | file_md5 = self.DATASET_MD5.get(part_name) |
| 568 | tsv_path = osp.join(LMUDataRoot(), f'{part_name}.tsv') |
| 569 | if not osp.exists(tsv_path) or (file_md5 and md5(tsv_path) != file_md5): |
| 570 | download_file(url, filename=tsv_path) |
| 571 | local_path = tsv_path.replace('.tsv', '_local.tsv') |
| 572 | if not osp.exists(local_path) or os.environ.get('FORCE_LOCAL'): |
| 573 | from ..tools import LOCALIZE |
| 574 | LOCALIZE(tsv_path, local_path) |
| 575 | tsv_path = local_path |
| 576 | # 加载数据 |
| 577 | df = load(tsv_path) |
| 578 | dfs.append(df) |
| 579 | # 合并所有数据 |
| 580 | data = pd.concat(dfs, ignore_index=True) |
| 581 | return data |
| 582 | else: |
| 583 | raise ValueError(f"未知的数据集:{dataset}") |
| 584 | |
| 585 | def report_acc_by_groups(self, df, group_column): |
| 586 | res = defaultdict(list) |
nothing calls this directly
no test coverage detected