MCPcopy
hub / github.com/huggingface/datasets / DummyBuilder

Class DummyBuilder

tests/test_builder.py:45–58  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

43
44
45class DummyBuilder(DatasetBuilder):
46 def _info(self):
47 return DatasetInfo(features=Features({"text": Value("string")}))
48
49 def _split_generators(self, dl_manager):
50 return [SplitGenerator(name=Split.TRAIN)]
51
52 def _prepare_split(self, split_generator, **kwargs):
53 fname = f"{self.dataset_name}-{split_generator.name}.arrow"
54 with ArrowWriter(features=self.info.features, path=os.path.join(self._output_dir, fname)) as writer:
55 writer.write_batch({"text": ["foo"] * 100})
56 num_examples, num_bytes = writer.finalize()
57 split_generator.split_info.num_examples = num_examples
58 split_generator.split_info.num_bytes = num_bytes
59
60
61class DummyGeneratorBasedBuilder(GeneratorBasedBuilder):

Calls

no outgoing calls