MCPcopy
hub / github.com/ray-project/ray / ImageDatasource

Class ImageDatasource

python/ray/data/_internal/datasource/image_datasource.py:30–155  ·  view source on GitHub ↗

A datasource that lets you read images.

Source from the content-addressed store, hash-verified

28
29
30class ImageDatasource(FileBasedDatasource):
31 """A datasource that lets you read images."""
32
33 _WRITE_FILE_PER_ROW = True
34 _FILE_EXTENSIONS = ["png", "jpg", "jpeg", "tif", "tiff", "bmp", "gif"]
35 # Use 8 threads per task to read image files.
36 _NUM_THREADS_PER_TASK = 8
37
38 def __init__(
39 self,
40 paths: Union[str, List[str]],
41 size: Optional[Tuple[int, int]] = None,
42 mode: Optional[str] = None,
43 **file_based_datasource_kwargs,
44 ):
45 super().__init__(paths, **file_based_datasource_kwargs)
46
47 _check_import(self, module="PIL", package="Pillow")
48
49 if size is not None and len(size) != 2:
50 raise ValueError(
51 "Expected `size` to contain two integers for height and width, "
52 f"but got {len(size)} integers instead."
53 )
54
55 if size is not None and (size[0] < 0 or size[1] < 0):
56 raise ValueError(
57 f"Expected `size` to contain positive integers, but got {size} instead."
58 )
59
60 self.size = size
61 self.mode = mode
62
63 meta_provider = file_based_datasource_kwargs.get("meta_provider", None)
64 if isinstance(meta_provider, ImageFileMetadataProvider):
65 self._encoding_ratio = self._estimate_files_encoding_ratio()
66 meta_provider._set_encoding_ratio(self._encoding_ratio)
67 else:
68 self._encoding_ratio = IMAGE_ENCODING_RATIO_ESTIMATE_DEFAULT
69
70 def _read_stream(
71 self,
72 f: "pyarrow.NativeFile",
73 path: str,
74 ) -> Iterator[Block]:
75 from PIL import Image, UnidentifiedImageError
76
77 data = f.readall()
78
79 try:
80 image = Image.open(io.BytesIO(data))
81 except UnidentifiedImageError as e:
82 raise ValueError(f"PIL couldn't load image file at path '{path}'.") from e
83
84 if self.size is not None and image.size != tuple(reversed(self.size)):
85 height, width = self.size
86 image = image.resize((width, height), resample=Image.BILINEAR)
87 if self.mode is not None and image.mode != self.mode:

Callers 2

read_imagesFunction · 0.90

Calls

no outgoing calls

Tested by 1

Used in the wild real call sites across dependent graphs

searching dependent graphs…