| 43 | |
| 44 | |
| 45 | class DummyBuilder(DatasetBuilder): |
| 46 | def _info(self): |
| 47 | return DatasetInfo(features=Features({"text": Value("string")})) |
| 48 | |
| 49 | def _split_generators(self, dl_manager): |
| 50 | return [SplitGenerator(name=Split.TRAIN)] |
| 51 | |
| 52 | def _prepare_split(self, split_generator, **kwargs): |
| 53 | fname = f"{self.dataset_name}-{split_generator.name}.arrow" |
| 54 | with ArrowWriter(features=self.info.features, path=os.path.join(self._output_dir, fname)) as writer: |
| 55 | writer.write_batch({"text": ["foo"] * 100}) |
| 56 | num_examples, num_bytes = writer.finalize() |
| 57 | split_generator.split_info.num_examples = num_examples |
| 58 | split_generator.split_info.num_bytes = num_bytes |
| 59 | |
| 60 | |
| 61 | class DummyGeneratorBasedBuilder(GeneratorBasedBuilder): |
no outgoing calls