| 145 | # self.zip.close() |
| 146 | |
| 147 | class StreamingRarDataset(IterableDataset): |
| 148 | def __init__(self, path, transform=None, default_size=256): |
| 149 | from PIL import ImageFile |
| 150 | ImageFile.LOAD_TRUNCATED_IMAGES = True |
| 151 | print("begin open rar") |
| 152 | self.rar = rarfile.RarFile(path) |
| 153 | print("finish open rar") |
| 154 | self.transform = transform |
| 155 | def callback_fn(file_buffer, filename): |
| 156 | try: |
| 157 | img = Image.open(file_buffer.get_bytes()).convert('RGB') |
| 158 | dirs, filename = os.path.split(filename) |
| 159 | filename = filename.split('.')[0] |
| 160 | if self.transform is not None: |
| 161 | img = self.transform(img) |
| 162 | return img, filename |
| 163 | except PIL.UnidentifiedImageError: |
| 164 | print("UnidentifiedImageError") |
| 165 | return torch.zeros((3, default_size, default_size)), "not_a_image" |
| 166 | self.callback_fn = callback_fn |
| 167 | # new handle |
| 168 | self.handle = None |
| 169 | self.callback_fn = callback_fn |
| 170 | |
| 171 | def __len__(self): |
| 172 | return len(self.rar.filelist) |
| 173 | def __next__(self): |
| 174 | if self.pointer >= len(self.members): |
| 175 | raise StopIteration() |
| 176 | if self.handle == None: |
| 177 | archive = unrarlib.RAROpenArchiveDataEx( |
| 178 | self.rar.filename, mode=constants.RAR_OM_EXTRACT) |
| 179 | self.handle = self.rar._open(archive) |
| 180 | # callback to memory |
| 181 | self.data_storage = _ReadIntoMemory() |
| 182 | c_callback = unrarlib.UNRARCALLBACK(self.data_storage._callback) |
| 183 | unrarlib.RARSetCallback(self.handle, c_callback, 0) |
| 184 | handle = self.handle |
| 185 | try: |
| 186 | rarinfo = self.rar._read_header(handle) |
| 187 | while rarinfo is not None: |
| 188 | if rarinfo.filename == self.members[self.pointer]: |
| 189 | self.rar._process_current(handle, constants.RAR_TEST) |
| 190 | break |
| 191 | else: |
| 192 | self.rar._process_current(handle, constants.RAR_SKIP) |
| 193 | rarinfo = self.rar._read_header(handle) |
| 194 | |
| 195 | if rarinfo is None: |
| 196 | self.data_storage = None |
| 197 | |
| 198 | except unrarlib.UnrarException: |
| 199 | raise BadRarFile("Bad RAR archive data.") |
| 200 | |
| 201 | if self.data_storage is None: |
| 202 | raise KeyError('There is no item named %r in the archive' % self.members[self.pointer]) |
| 203 | |
| 204 | # return file-like object |