Load the OptiLLM Bench dataset.
()
| 77 | ] |
| 78 | |
| 79 | def load_optillm_bench() -> datasets.Dataset: |
| 80 | """Load the OptiLLM Bench dataset.""" |
| 81 | try: |
| 82 | dataset = load_dataset("codelion/optillmbench") |
| 83 | return dataset["test"] # We use the test split for evaluation |
| 84 | except Exception as e: |
| 85 | logger.error(f"Error loading dataset: {e}") |
| 86 | raise |
| 87 | |
| 88 | def extract_gsm8k_answer(text: str) -> float: |
| 89 | """Extract numerical answer after ### from GSM8K responses.""" |