(data_name: str)
| 82 | |
| 83 | |
| 84 | def load_and_process_dataset(data_name: str) -> list[dict]: |
| 85 | if data_name not in DATASETS: |
| 86 | raise ValueError(f"Unknown dataset '{data_name}'. Available: {list(DATASETS.keys())}") |
| 87 | |
| 88 | path = CACHE_DIR / f"{data_name}.jsonl" |
| 89 | if not path.exists(): |
| 90 | _prepare_dataset(data_name) |
| 91 | |
| 92 | with open(path) as f: |
| 93 | return [json.loads(line) for line in f] |
| 94 | |
| 95 | |
| 96 | def _limit_dataset(dataset: list[dict], max_samples: int | None) -> list[dict]: |
no test coverage detected