Create a :class:`~ray.data.Dataset` from a list of PyArrow tables. Examples: >>> import pyarrow as pa >>> import ray >>> table = pa.table({"x": [1]}) >>> ray.data.from_arrow(table) # doctest: +ELLIPSIS shape: (1, 1) ╭───────╮ │ x │
(
tables: Union["pyarrow.Table", bytes, List[Union["pyarrow.Table", bytes]]],
*,
override_num_blocks: Optional[int] = None,
)
| 3630 | |
| 3631 | @PublicAPI |
| 3632 | def from_arrow( |
| 3633 | tables: Union["pyarrow.Table", bytes, List[Union["pyarrow.Table", bytes]]], |
| 3634 | *, |
| 3635 | override_num_blocks: Optional[int] = None, |
| 3636 | ) -> MaterializedDataset: |
| 3637 | """Create a :class:`~ray.data.Dataset` from a list of PyArrow tables. |
| 3638 | |
| 3639 | Examples: |
| 3640 | >>> import pyarrow as pa |
| 3641 | >>> import ray |
| 3642 | >>> table = pa.table({"x": [1]}) |
| 3643 | >>> ray.data.from_arrow(table) # doctest: +ELLIPSIS |
| 3644 | shape: (1, 1) |
| 3645 | ╭───────╮ |
| 3646 | │ x │ |
| 3647 | │ --- │ |
| 3648 | │ int64 │ |
| 3649 | ╞═══════╡ |
| 3650 | │ 1 │ |
| 3651 | ╰───────╯ |
| 3652 | (Showing 1 of 1 rows) |
| 3653 | |
| 3654 | Create a Ray Dataset from a list of PyArrow tables. |
| 3655 | |
| 3656 | >>> ray.data.from_arrow([table, table]) # doctest: +ELLIPSIS |
| 3657 | shape: (2, 1) |
| 3658 | ╭───────╮ |
| 3659 | │ x │ |
| 3660 | │ --- │ |
| 3661 | │ int64 │ |
| 3662 | ╞═══════╡ |
| 3663 | │ 1 │ |
| 3664 | │ 1 │ |
| 3665 | ╰───────╯ |
| 3666 | (Showing 2 of 2 rows) |
| 3667 | |
| 3668 | |
| 3669 | Args: |
| 3670 | tables: A PyArrow table, or a list of PyArrow tables, |
| 3671 | or its streaming format in bytes. |
| 3672 | override_num_blocks: Override the number of output blocks from all read tasks. |
| 3673 | By default, the number of output blocks is dynamically decided based on |
| 3674 | input data size and available resources. You shouldn't manually set this |
| 3675 | value in most cases. |
| 3676 | |
| 3677 | Returns: |
| 3678 | :class:`~ray.data.Dataset` holding data from the PyArrow tables. |
| 3679 | """ |
| 3680 | import builtins |
| 3681 | |
| 3682 | import pyarrow as pa |
| 3683 | |
| 3684 | if isinstance(tables, (pa.Table, bytes)): |
| 3685 | tables = [tables] |
| 3686 | |
| 3687 | if override_num_blocks is not None: |
| 3688 | if override_num_blocks <= 0: |
| 3689 | raise ValueError("override_num_blocks must be > 0") |
no test coverage detected
searching dependent graphs…