Create a :class:`~ray.data.Dataset` from a list of Ray object references to PyArrow tables. Examples: >>> import pyarrow as pa >>> import ray >>> table_ref = ray.put(pa.table({"x": [1]})) >>> ray.data.from_arrow_refs(table_ref) # doctest: +ELLIPSIS s
(
tables: Union[
ObjectRef[Union["pyarrow.Table", bytes]],
List[ObjectRef[Union["pyarrow.Table", bytes]]],
],
)
| 3718 | |
| 3719 | @DeveloperAPI |
| 3720 | def from_arrow_refs( |
| 3721 | tables: Union[ |
| 3722 | ObjectRef[Union["pyarrow.Table", bytes]], |
| 3723 | List[ObjectRef[Union["pyarrow.Table", bytes]]], |
| 3724 | ], |
| 3725 | ) -> MaterializedDataset: |
| 3726 | """Create a :class:`~ray.data.Dataset` from a list of Ray object references to |
| 3727 | PyArrow tables. |
| 3728 | |
| 3729 | Examples: |
| 3730 | >>> import pyarrow as pa |
| 3731 | >>> import ray |
| 3732 | >>> table_ref = ray.put(pa.table({"x": [1]})) |
| 3733 | >>> ray.data.from_arrow_refs(table_ref) # doctest: +ELLIPSIS |
| 3734 | shape: (1, 1) |
| 3735 | ╭───────╮ |
| 3736 | │ x │ |
| 3737 | │ --- │ |
| 3738 | │ int64 │ |
| 3739 | ╞═══════╡ |
| 3740 | │ 1 │ |
| 3741 | ╰───────╯ |
| 3742 | (Showing 1 of 1 rows) |
| 3743 | |
| 3744 | Create a Ray Dataset from a list of PyArrow table references |
| 3745 | |
| 3746 | >>> ray.data.from_arrow_refs([table_ref, table_ref]) # doctest: +ELLIPSIS |
| 3747 | shape: (2, 1) |
| 3748 | ╭───────╮ |
| 3749 | │ x │ |
| 3750 | │ --- │ |
| 3751 | │ int64 │ |
| 3752 | ╞═══════╡ |
| 3753 | │ 1 │ |
| 3754 | │ 1 │ |
| 3755 | ╰───────╯ |
| 3756 | (Showing 2 of 2 rows) |
| 3757 | |
| 3758 | |
| 3759 | Args: |
| 3760 | tables: A Ray object reference to Arrow table, or list of Ray object |
| 3761 | references to Arrow tables, or its streaming format in bytes. |
| 3762 | |
| 3763 | Returns: |
| 3764 | :class:`~ray.data.Dataset` holding data read from the tables. |
| 3765 | """ |
| 3766 | if isinstance(tables, ray.ObjectRef): |
| 3767 | tables = [tables] |
| 3768 | |
| 3769 | get_metadata_schema = cached_remote_fn(get_table_block_metadata_schema) |
| 3770 | label_selector = DataContext.get_current().execution_options.label_selector |
| 3771 | if label_selector: |
| 3772 | get_metadata_schema = get_metadata_schema.options(label_selector=label_selector) |
| 3773 | metadata_schema = ray.get([get_metadata_schema.remote(t) for t in tables]) |
| 3774 | stats = DatasetStats(metadata={"FromArrow": metadata_schema}, parent=None) |
| 3775 | context = DataContext.get_current().copy() |
| 3776 | logical_plan = LogicalPlan(FromArrow(tables, metadata_schema), context) |
| 3777 | return MaterializedDataset(logical_plan, context, stats) |
no test coverage detected
searching dependent graphs…