(dataset_path, features, num_examples=100, seq_shapes=None)
| 45 | |
| 46 | |
| 47 | def generate_example_dataset(dataset_path, features, num_examples=100, seq_shapes=None): |
| 48 | dummy_data = generate_examples(features, num_examples=num_examples, seq_shapes=seq_shapes) |
| 49 | |
| 50 | with ArrowWriter(features=features, path=dataset_path) as writer: |
| 51 | for key, record in dummy_data: |
| 52 | example = features.encode_example(record) |
| 53 | writer.write(example) |
| 54 | |
| 55 | num_final_examples, num_bytes = writer.finalize() |
| 56 | |
| 57 | if not num_final_examples == num_examples: |
| 58 | raise ValueError( |
| 59 | f"Error writing the dataset, wrote {num_final_examples} examples but should have written {num_examples}." |
| 60 | ) |
| 61 | |
| 62 | dataset = datasets.Dataset.from_file(filename=dataset_path, info=datasets.DatasetInfo(features=features)) |
| 63 | |
| 64 | return dataset |
no test coverage detected