Creates a :class:`~ray.data.Dataset` from image files. The column name defaults to "image". Examples: >>> import ray >>> path = "s3://anonymous@ray-example-data/batoidea/JPEGImages/" >>> ds = ray.data.read_images(path) >>> ds.schema() Column Type
(
paths: Union[str, List[str]],
*,
filesystem: Optional["pyarrow.fs.FileSystem"] = None,
parallelism: int = -1,
num_cpus: Optional[float] = None,
num_gpus: Optional[float] = None,
memory: Optional[float] = None,
ray_remote_args: Dict[str, Any] = None,
arrow_open_file_args: Optional[Dict[str, Any]] = None,
partition_filter: Optional[PathPartitionFilter] = None,
partitioning: Partitioning = None,
size: Optional[Tuple[int, int]] = None,
mode: Optional[str] = None,
include_paths: bool = False,
ignore_missing_paths: bool = False,
shuffle: Optional[Union[Literal["files"], FileShuffleConfig]] = None,
file_extensions: Optional[List[str]] = ImageDatasource._FILE_EXTENSIONS,
concurrency: Optional[int] = None,
override_num_blocks: Optional[int] = None,
)
| 1451 | |
| 1452 | @PublicAPI(stability="beta") |
| 1453 | def read_images( |
| 1454 | paths: Union[str, List[str]], |
| 1455 | *, |
| 1456 | filesystem: Optional["pyarrow.fs.FileSystem"] = None, |
| 1457 | parallelism: int = -1, |
| 1458 | num_cpus: Optional[float] = None, |
| 1459 | num_gpus: Optional[float] = None, |
| 1460 | memory: Optional[float] = None, |
| 1461 | ray_remote_args: Dict[str, Any] = None, |
| 1462 | arrow_open_file_args: Optional[Dict[str, Any]] = None, |
| 1463 | partition_filter: Optional[PathPartitionFilter] = None, |
| 1464 | partitioning: Partitioning = None, |
| 1465 | size: Optional[Tuple[int, int]] = None, |
| 1466 | mode: Optional[str] = None, |
| 1467 | include_paths: bool = False, |
| 1468 | ignore_missing_paths: bool = False, |
| 1469 | shuffle: Optional[Union[Literal["files"], FileShuffleConfig]] = None, |
| 1470 | file_extensions: Optional[List[str]] = ImageDatasource._FILE_EXTENSIONS, |
| 1471 | concurrency: Optional[int] = None, |
| 1472 | override_num_blocks: Optional[int] = None, |
| 1473 | ) -> Dataset: |
| 1474 | """Creates a :class:`~ray.data.Dataset` from image files. |
| 1475 | |
| 1476 | The column name defaults to "image". |
| 1477 | |
| 1478 | Examples: |
| 1479 | >>> import ray |
| 1480 | >>> path = "s3://anonymous@ray-example-data/batoidea/JPEGImages/" |
| 1481 | >>> ds = ray.data.read_images(path) |
| 1482 | >>> ds.schema() |
| 1483 | Column Type |
| 1484 | ------ ---- |
| 1485 | image ArrowTensorTypeV2(shape=(32, 32, 3), dtype=uint8) |
| 1486 | |
| 1487 | If you need image file paths, set ``include_paths=True``. |
| 1488 | |
| 1489 | >>> ds = ray.data.read_images(path, include_paths=True) |
| 1490 | >>> ds.schema() |
| 1491 | Column Type |
| 1492 | ------ ---- |
| 1493 | image ArrowTensorTypeV2(shape=(32, 32, 3), dtype=uint8) |
| 1494 | path string |
| 1495 | >>> ds.take(1)[0]["path"] |
| 1496 | 'ray-example-data/batoidea/JPEGImages/1.jpeg' |
| 1497 | |
| 1498 | If your images are arranged like: |
| 1499 | |
| 1500 | .. code:: |
| 1501 | |
| 1502 | root/dog/xxx.png |
| 1503 | root/dog/xxy.png |
| 1504 | |
| 1505 | root/cat/123.png |
| 1506 | root/cat/nsdf3.png |
| 1507 | |
| 1508 | Then you can include the labels by specifying a |
| 1509 | :class:`~ray.data.datasource.partitioning.Partitioning`. |
| 1510 |
no test coverage detected
searching dependent graphs…