Dataset schema. Attributes: base_schema: The underlying Arrow or Pandas schema.
| 7660 | |
| 7661 | @PublicAPI(stability="beta") |
| 7662 | class Schema: |
| 7663 | """Dataset schema. |
| 7664 | |
| 7665 | Attributes: |
| 7666 | base_schema: The underlying Arrow or Pandas schema. |
| 7667 | """ |
| 7668 | |
| 7669 | def __init__( |
| 7670 | self, |
| 7671 | base_schema: Union["pyarrow.lib.Schema", "PandasBlockSchema"], |
| 7672 | *, |
| 7673 | data_context: Optional[DataContext] = None, |
| 7674 | ): |
| 7675 | """ |
| 7676 | Initialize a :class:`Schema` wrapper around an Arrow or Pandas schema. |
| 7677 | |
| 7678 | Args: |
| 7679 | base_schema: The underlying Arrow or Pandas schema. |
| 7680 | data_context: The data context to use for this schema. |
| 7681 | """ |
| 7682 | self.base_schema = base_schema |
| 7683 | |
| 7684 | # Snapshot the current context, so that the config of Datasets is always |
| 7685 | # determined by the config at the time it was created. |
| 7686 | self._context = data_context or copy.deepcopy(DataContext.get_current()) |
| 7687 | |
| 7688 | @property |
| 7689 | def names(self) -> List[str]: |
| 7690 | """Lists the columns of this Dataset.""" |
| 7691 | from ray.data._internal.arrow_block import _is_user_visible_column |
| 7692 | |
| 7693 | # ``__bsp_stub`` is a physical placeholder the read path injects |
| 7694 | # into zero-column blocks so ``pa.concat_tables`` doesn't collapse |
| 7695 | # the row count. It's not part of the user-visible schema. |
| 7696 | return [ |
| 7697 | name for name in self.base_schema.names if _is_user_visible_column(name) |
| 7698 | ] |
| 7699 | |
| 7700 | @property |
| 7701 | def types(self) -> List[Union[type[object], "pyarrow.lib.DataType"]]: |
| 7702 | """Lists the types of this Dataset in Arrow format |
| 7703 | |
| 7704 | For non-Arrow compatible types, we return "object". |
| 7705 | """ |
| 7706 | import pandas as pd |
| 7707 | import pyarrow as pa |
| 7708 | from pandas.core.dtypes.dtypes import BaseMaskedDtype |
| 7709 | |
| 7710 | from ray.data._internal.arrow_block import _is_user_visible_column |
| 7711 | from ray.data._internal.tensor_extensions.arrow import ( |
| 7712 | create_arrow_fixed_shape_tensor_type, |
| 7713 | ) |
| 7714 | from ray.data.extensions import TensorDtype |
| 7715 | |
| 7716 | def _convert_to_pa_type( |
| 7717 | dtype: Union[np.dtype, pd.ArrowDtype, BaseMaskedDtype], |
| 7718 | ) -> pa.DataType: |
| 7719 | if isinstance(dtype, pd.ArrowDtype): |
no outgoing calls
searching dependent graphs…