Read any sliceable array into a Dask Dataframe Uses getitem syntax to pull slices out of the array. The array need not be a NumPy array but must support slicing syntax x[50000:100000] and have 2 dimensions: x.ndim == 2 or have a record dtype: x.dtype ==
(arr, chunksize=50_000, columns=None, meta=None)
| 4967 | |
| 4968 | |
| 4969 | def from_array(arr, chunksize=50_000, columns=None, meta=None): |
| 4970 | """Read any sliceable array into a Dask Dataframe |
| 4971 | |
| 4972 | Uses getitem syntax to pull slices out of the array. The array need not be |
| 4973 | a NumPy array but must support slicing syntax |
| 4974 | |
| 4975 | x[50000:100000] |
| 4976 | |
| 4977 | and have 2 dimensions: |
| 4978 | |
| 4979 | x.ndim == 2 |
| 4980 | |
| 4981 | or have a record dtype: |
| 4982 | |
| 4983 | x.dtype == [('name', 'O'), ('balance', 'i8')] |
| 4984 | |
| 4985 | Parameters |
| 4986 | ---------- |
| 4987 | x : array_like |
| 4988 | chunksize : int, optional |
| 4989 | The number of rows per partition to use. |
| 4990 | columns : list or string, optional |
| 4991 | list of column names if DataFrame, single string if Series |
| 4992 | meta : object, optional |
| 4993 | An optional `meta` parameter can be passed for dask |
| 4994 | to specify the concrete dataframe type to use for partitions of |
| 4995 | the Dask dataframe. By default, pandas DataFrame is used. |
| 4996 | |
| 4997 | Returns |
| 4998 | ------- |
| 4999 | dask.DataFrame or dask.Series |
| 5000 | A dask DataFrame/Series |
| 5001 | """ |
| 5002 | import dask.array as da |
| 5003 | |
| 5004 | if isinstance(arr, da.Array): |
| 5005 | return from_dask_array(arr, columns=columns, meta=meta) |
| 5006 | |
| 5007 | from dask.dataframe.dask_expr.io.io import FromArray |
| 5008 | |
| 5009 | result = FromArray( |
| 5010 | arr, |
| 5011 | chunksize=chunksize, |
| 5012 | original_columns=columns, |
| 5013 | meta=meta, |
| 5014 | ) |
| 5015 | if pyarrow_strings_enabled() and arr.dtype.kind in "OU": |
| 5016 | result = expr.ArrowStringConversion(result) |
| 5017 | return new_collection(result) |
| 5018 | |
| 5019 | |
| 5020 | def from_graph(layer, _meta, divisions, keys, name_prefix): |
searching dependent graphs…