MCPcopy
hub / github.com/huggingface/datasets / test_image_webdataset

Function test_image_webdataset

tests/packaged_modules/test_webdataset.py:142–169  ·  view source on GitHub ↗
(image_wds_file)

Source from the content-addressed store, hash-verified

140
141@require_pil
142def test_image_webdataset(image_wds_file):
143 import PIL.Image
144
145 data_files = {"train": [image_wds_file]}
146 webdataset = WebDataset(data_files=data_files)
147 split_generators = webdataset._split_generators(DownloadManager())
148 assert webdataset.info.features == Features(
149 {
150 "__key__": Value("string"),
151 "__url__": Value("string"),
152 "json": {"caption": Value("string")},
153 "jpg": Image(),
154 }
155 )
156 assert len(split_generators) == 1
157 split_generator = split_generators[0]
158 assert split_generator.name == "train"
159 generator = webdataset._generate_examples(**split_generator.gen_kwargs)
160 _, examples = zip(*generator)
161 assert len(examples) == 3
162 assert isinstance(examples[0]["json"], dict)
163 assert isinstance(examples[0]["json"]["caption"], str)
164 assert isinstance(examples[0]["jpg"], dict) # keep encoded to avoid unecessary copies
165 encoded = webdataset.info.features.encode_example(examples[0])
166 decoded = webdataset.info.features.decode_example(encoded)
167 assert isinstance(decoded["json"], dict)
168 assert isinstance(decoded["json"]["caption"], str)
169 assert isinstance(decoded["jpg"], PIL.Image.Image)
170
171
172def test_upper_lower_case(upper_lower_case_file):

Callers

nothing calls this directly

Calls 9

_split_generatorsMethod · 0.95
_generate_examplesMethod · 0.95
WebDatasetClass · 0.90
DownloadManagerClass · 0.90
FeaturesClass · 0.90
ValueClass · 0.90
ImageClass · 0.90
encode_exampleMethod · 0.45
decode_exampleMethod · 0.45

Tested by

no test coverage detected