| 32 | |
| 33 | |
| 34 | class MMLU(Evaluator): |
| 35 | DATASET_URL = "https://people.eecs.berkeley.edu/~hendrycks/data.tar" |
| 36 | |
| 37 | CHOICES = ["A", "B", "C", "D"] |
| 38 | SUBJECT_TO_SUBCATEGORIES = { |
| 39 | "abstract_algebra": ["math"], |
| 40 | "anatomy": ["health"], |
| 41 | "astronomy": ["physics"], |
| 42 | "business_ethics": ["business"], |
| 43 | "clinical_knowledge": ["health"], |
| 44 | "college_biology": ["biology"], |
| 45 | "college_chemistry": ["chemistry"], |
| 46 | "college_computer_science": ["computer science"], |
| 47 | "college_mathematics": ["math"], |
| 48 | "college_medicine": ["health"], |
| 49 | "college_physics": ["physics"], |
| 50 | "computer_security": ["computer science"], |
| 51 | "conceptual_physics": ["physics"], |
| 52 | "econometrics": ["economics"], |
| 53 | "electrical_engineering": ["engineering"], |
| 54 | "elementary_mathematics": ["math"], |
| 55 | "formal_logic": ["philosophy"], |
| 56 | "global_facts": ["other"], |
| 57 | "high_school_biology": ["biology"], |
| 58 | "high_school_chemistry": ["chemistry"], |
| 59 | "high_school_computer_science": ["computer science"], |
| 60 | "high_school_european_history": ["history"], |
| 61 | "high_school_geography": ["geography"], |
| 62 | "high_school_government_and_politics": ["politics"], |
| 63 | "high_school_macroeconomics": ["economics"], |
| 64 | "high_school_mathematics": ["math"], |
| 65 | "high_school_microeconomics": ["economics"], |
| 66 | "high_school_physics": ["physics"], |
| 67 | "high_school_psychology": ["psychology"], |
| 68 | "high_school_statistics": ["math"], |
| 69 | "high_school_us_history": ["history"], |
| 70 | "high_school_world_history": ["history"], |
| 71 | "human_aging": ["health"], |
| 72 | "human_sexuality": ["culture"], |
| 73 | "international_law": ["law"], |
| 74 | "jurisprudence": ["law"], |
| 75 | "logical_fallacies": ["philosophy"], |
| 76 | "machine_learning": ["computer science"], |
| 77 | "management": ["business"], |
| 78 | "marketing": ["business"], |
| 79 | "medical_genetics": ["health"], |
| 80 | "miscellaneous": ["other"], |
| 81 | "moral_disputes": ["philosophy"], |
| 82 | "moral_scenarios": ["philosophy"], |
| 83 | "nutrition": ["health"], |
| 84 | "philosophy": ["philosophy"], |
| 85 | "prehistory": ["history"], |
| 86 | "professional_accounting": ["other"], |
| 87 | "professional_law": ["law"], |
| 88 | "professional_medicine": ["health"], |
| 89 | "professional_psychology": ["psychology"], |
| 90 | "public_relations": ["politics"], |
| 91 | "security_studies": ["politics"], |