Generate the dataset and save it to a JSONL file.
(num_samples: int, output_file: str)
| 76 | } |
| 77 | |
| 78 | async def generate_dataset(num_samples: int, output_file: str): |
| 79 | """Generate the dataset and save it to a JSONL file.""" |
| 80 | dataset = load_dataset("lmsys/arena-hard-auto-v0.1", split="train") |
| 81 | |
| 82 | with open(output_file, "w") as f: |
| 83 | for sample in tqdm(dataset.select(range( num_samples)), total=num_samples): |
| 84 | try: |
| 85 | result = await process_sample(sample) |
| 86 | f.write(json.dumps(result) + "\n") |
| 87 | except Exception as e: |
| 88 | print(f"Skip over this item due to error {str(e)}") |
| 89 | |
| 90 | def main(): |
| 91 | parser = argparse.ArgumentParser(description="Generate OptILM dataset") |
no test coverage detected