MCPcopy
hub / github.com/huggingface/datasets / test_builder_as_dataset

Function test_builder_as_dataset

tests/test_builder.py:536–567  ·  view source on GitHub ↗
(split, expected_dataset_class, expected_dataset_length, in_memory, tmp_path)

Source from the content-addressed store, hash-verified

534)
535@pytest.mark.parametrize("in_memory", [False, True])
536def test_builder_as_dataset(split, expected_dataset_class, expected_dataset_length, in_memory, tmp_path):
537 cache_dir = str(tmp_path)
538 builder = DummyBuilder(cache_dir=cache_dir)
539 os.makedirs(builder.cache_dir)
540
541 builder.info.splits = SplitDict()
542 builder.info.splits.add(SplitInfo("train", num_examples=10))
543 builder.info.splits.add(SplitInfo("test", num_examples=10))
544
545 for info_split in builder.info.splits:
546 with ArrowWriter(
547 path=os.path.join(builder.cache_dir, f"{builder.dataset_name}-{info_split}.arrow"),
548 features=Features({"text": Value("string")}),
549 ) as writer:
550 writer.write_batch({"text": ["foo"] * 10})
551 writer.finalize()
552
553 with assert_arrow_memory_increases() if in_memory else assert_arrow_memory_doesnt_increase():
554 dataset = builder.as_dataset(split=split, in_memory=in_memory)
555 assert isinstance(dataset, expected_dataset_class)
556 if isinstance(dataset, DatasetDict):
557 assert list(dataset.keys()) == ["train", "test"]
558 datasets = dataset.values()
559 expected_splits = ["train", "test"]
560 elif isinstance(dataset, Dataset):
561 datasets = [dataset]
562 expected_splits = [split]
563 for dataset, expected_split in zip(datasets, expected_splits):
564 assert dataset.split == expected_split
565 assert len(dataset) == expected_dataset_length
566 assert dataset.features == Features({"text": Value("string")})
567 dataset.column_names == ["text"]
568
569
570@pytest.mark.parametrize("in_memory", [False, True])

Callers

nothing calls this directly

Calls 13

SplitDictClass · 0.90
SplitInfoClass · 0.90
ArrowWriterClass · 0.90
FeaturesClass · 0.90
ValueClass · 0.90
makedirsMethod · 0.80
write_batchMethod · 0.80
finalizeMethod · 0.80
as_datasetMethod · 0.80
DummyBuilderClass · 0.70

Tested by

no test coverage detected