A datasource that lets you read images.
| 28 | |
| 29 | |
| 30 | class ImageDatasource(FileBasedDatasource): |
| 31 | """A datasource that lets you read images.""" |
| 32 | |
| 33 | _WRITE_FILE_PER_ROW = True |
| 34 | _FILE_EXTENSIONS = ["png", "jpg", "jpeg", "tif", "tiff", "bmp", "gif"] |
| 35 | # Use 8 threads per task to read image files. |
| 36 | _NUM_THREADS_PER_TASK = 8 |
| 37 | |
| 38 | def __init__( |
| 39 | self, |
| 40 | paths: Union[str, List[str]], |
| 41 | size: Optional[Tuple[int, int]] = None, |
| 42 | mode: Optional[str] = None, |
| 43 | **file_based_datasource_kwargs, |
| 44 | ): |
| 45 | super().__init__(paths, **file_based_datasource_kwargs) |
| 46 | |
| 47 | _check_import(self, module="PIL", package="Pillow") |
| 48 | |
| 49 | if size is not None and len(size) != 2: |
| 50 | raise ValueError( |
| 51 | "Expected `size` to contain two integers for height and width, " |
| 52 | f"but got {len(size)} integers instead." |
| 53 | ) |
| 54 | |
| 55 | if size is not None and (size[0] < 0 or size[1] < 0): |
| 56 | raise ValueError( |
| 57 | f"Expected `size` to contain positive integers, but got {size} instead." |
| 58 | ) |
| 59 | |
| 60 | self.size = size |
| 61 | self.mode = mode |
| 62 | |
| 63 | meta_provider = file_based_datasource_kwargs.get("meta_provider", None) |
| 64 | if isinstance(meta_provider, ImageFileMetadataProvider): |
| 65 | self._encoding_ratio = self._estimate_files_encoding_ratio() |
| 66 | meta_provider._set_encoding_ratio(self._encoding_ratio) |
| 67 | else: |
| 68 | self._encoding_ratio = IMAGE_ENCODING_RATIO_ESTIMATE_DEFAULT |
| 69 | |
| 70 | def _read_stream( |
| 71 | self, |
| 72 | f: "pyarrow.NativeFile", |
| 73 | path: str, |
| 74 | ) -> Iterator[Block]: |
| 75 | from PIL import Image, UnidentifiedImageError |
| 76 | |
| 77 | data = f.readall() |
| 78 | |
| 79 | try: |
| 80 | image = Image.open(io.BytesIO(data)) |
| 81 | except UnidentifiedImageError as e: |
| 82 | raise ValueError(f"PIL couldn't load image file at path '{path}'.") from e |
| 83 | |
| 84 | if self.size is not None and image.size != tuple(reversed(self.size)): |
| 85 | height, width = self.size |
| 86 | image = image.resize((width, height), resample=Image.BILINEAR) |
| 87 | if self.mode is not None and image.mode != self.mode: |
no outgoing calls
searching dependent graphs…