MCPcopy
hub / github.com/ray-project/ray / Schema

Class Schema

python/ray/data/dataset.py:7662–7786  ·  view source on GitHub ↗

Dataset schema. Attributes: base_schema: The underlying Arrow or Pandas schema.

Source from the content-addressed store, hash-verified

7660
7661@PublicAPI(stability="beta")
7662class Schema:
7663 """Dataset schema.
7664
7665 Attributes:
7666 base_schema: The underlying Arrow or Pandas schema.
7667 """
7668
7669 def __init__(
7670 self,
7671 base_schema: Union["pyarrow.lib.Schema", "PandasBlockSchema"],
7672 *,
7673 data_context: Optional[DataContext] = None,
7674 ):
7675 """
7676 Initialize a :class:`Schema` wrapper around an Arrow or Pandas schema.
7677
7678 Args:
7679 base_schema: The underlying Arrow or Pandas schema.
7680 data_context: The data context to use for this schema.
7681 """
7682 self.base_schema = base_schema
7683
7684 # Snapshot the current context, so that the config of Datasets is always
7685 # determined by the config at the time it was created.
7686 self._context = data_context or copy.deepcopy(DataContext.get_current())
7687
7688 @property
7689 def names(self) -> List[str]:
7690 """Lists the columns of this Dataset."""
7691 from ray.data._internal.arrow_block import _is_user_visible_column
7692
7693 # ``__bsp_stub`` is a physical placeholder the read path injects
7694 # into zero-column blocks so ``pa.concat_tables`` doesn't collapse
7695 # the row count. It's not part of the user-visible schema.
7696 return [
7697 name for name in self.base_schema.names if _is_user_visible_column(name)
7698 ]
7699
7700 @property
7701 def types(self) -> List[Union[type[object], "pyarrow.lib.DataType"]]:
7702 """Lists the types of this Dataset in Arrow format
7703
7704 For non-Arrow compatible types, we return "object".
7705 """
7706 import pandas as pd
7707 import pyarrow as pa
7708 from pandas.core.dtypes.dtypes import BaseMaskedDtype
7709
7710 from ray.data._internal.arrow_block import _is_user_visible_column
7711 from ray.data._internal.tensor_extensions.arrow import (
7712 create_arrow_fixed_shape_tensor_type,
7713 )
7714 from ray.data.extensions import TensorDtype
7715
7716 def _convert_to_pa_type(
7717 dtype: Union[np.dtype, pd.ArrowDtype, BaseMaskedDtype],
7718 ) -> pa.DataType:
7719 if isinstance(dtype, pd.ArrowDtype):

Callers 15

test_tensors_basicFunction · 0.90
test_zip_arrowFunction · 0.90
test_lance_read_basicFunction · 0.90
test_parquet_read_basicFunction · 0.90
test_delta_read_basicFunction · 0.90
test_csv_readFunction · 0.90
test_json_readFunction · 0.90

Calls

no outgoing calls

Tested by 15

test_tensors_basicFunction · 0.72
test_zip_arrowFunction · 0.72
test_lance_read_basicFunction · 0.72
test_parquet_read_basicFunction · 0.72
test_delta_read_basicFunction · 0.72
test_csv_readFunction · 0.72
test_json_readFunction · 0.72

Used in the wild real call sites across dependent graphs

searching dependent graphs…