(image_wds_file)
| 140 | |
| 141 | @require_pil |
| 142 | def test_image_webdataset(image_wds_file): |
| 143 | import PIL.Image |
| 144 | |
| 145 | data_files = {"train": [image_wds_file]} |
| 146 | webdataset = WebDataset(data_files=data_files) |
| 147 | split_generators = webdataset._split_generators(DownloadManager()) |
| 148 | assert webdataset.info.features == Features( |
| 149 | { |
| 150 | "__key__": Value("string"), |
| 151 | "__url__": Value("string"), |
| 152 | "json": {"caption": Value("string")}, |
| 153 | "jpg": Image(), |
| 154 | } |
| 155 | ) |
| 156 | assert len(split_generators) == 1 |
| 157 | split_generator = split_generators[0] |
| 158 | assert split_generator.name == "train" |
| 159 | generator = webdataset._generate_examples(**split_generator.gen_kwargs) |
| 160 | _, examples = zip(*generator) |
| 161 | assert len(examples) == 3 |
| 162 | assert isinstance(examples[0]["json"], dict) |
| 163 | assert isinstance(examples[0]["json"]["caption"], str) |
| 164 | assert isinstance(examples[0]["jpg"], dict) # keep encoded to avoid unecessary copies |
| 165 | encoded = webdataset.info.features.encode_example(examples[0]) |
| 166 | decoded = webdataset.info.features.decode_example(encoded) |
| 167 | assert isinstance(decoded["json"], dict) |
| 168 | assert isinstance(decoded["json"]["caption"], str) |
| 169 | assert isinstance(decoded["jpg"], PIL.Image.Image) |
| 170 | |
| 171 | |
| 172 | def test_upper_lower_case(upper_lower_case_file): |
nothing calls this directly
no test coverage detected