Create a Dask DataFrame from a Dask Array. Converts a 2d array into a DataFrame and a 1d array into a Series. Parameters ---------- x : da.Array columns : list or string list of column names if DataFrame, single string if Series index : dask.dataframe.Index, optiona
(x, columns=None, index=None, meta=None)
| 100 | |
| 101 | |
| 102 | def from_dask_array(x, columns=None, index=None, meta=None): |
| 103 | """Create a Dask DataFrame from a Dask Array. |
| 104 | |
| 105 | Converts a 2d array into a DataFrame and a 1d array into a Series. |
| 106 | |
| 107 | Parameters |
| 108 | ---------- |
| 109 | x : da.Array |
| 110 | columns : list or string |
| 111 | list of column names if DataFrame, single string if Series |
| 112 | index : dask.dataframe.Index, optional |
| 113 | An optional *dask* Index to use for the output Series or DataFrame. |
| 114 | |
| 115 | The default output index depends on whether `x` has any unknown |
| 116 | chunks. If there are any unknown chunks, the output has ``None`` |
| 117 | for all the divisions (one per chunk). If all the chunks are known, |
| 118 | a default index with known divisions is created. |
| 119 | |
| 120 | Specifying `index` can be useful if you're conforming a Dask Array |
| 121 | to an existing dask Series or DataFrame, and you would like the |
| 122 | indices to match. |
| 123 | meta : object, optional |
| 124 | An optional `meta` parameter can be passed for dask |
| 125 | to specify the concrete dataframe type to be returned. |
| 126 | By default, pandas DataFrame is used. |
| 127 | |
| 128 | Examples |
| 129 | -------- |
| 130 | >>> import dask.array as da |
| 131 | >>> import dask.dataframe as dd |
| 132 | >>> x = da.ones((4, 2), chunks=(2, 2)) |
| 133 | >>> df = dd.io.from_dask_array(x, columns=['a', 'b']) |
| 134 | >>> df.compute() |
| 135 | a b |
| 136 | 0 1.0 1.0 |
| 137 | 1 1.0 1.0 |
| 138 | 2 1.0 1.0 |
| 139 | 3 1.0 1.0 |
| 140 | |
| 141 | See Also |
| 142 | -------- |
| 143 | dask.bag.to_dataframe: from dask.bag |
| 144 | """ |
| 145 | meta = _meta_from_array(x, columns, index, meta=meta) |
| 146 | |
| 147 | name = "from-dask-array-" + tokenize(x, columns) |
| 148 | graph_dependencies = [x] |
| 149 | arrays_and_indices = [x.name, "ij" if x.ndim == 2 else "i"] |
| 150 | numblocks = {x.name: x.numblocks} |
| 151 | |
| 152 | if index is not None: |
| 153 | # An index is explicitly given by the caller, so we can pass it through to the |
| 154 | # initializer after a few checks. |
| 155 | if index.npartitions != x.numblocks[0]: |
| 156 | msg = ( |
| 157 | "The index and array have different numbers of blocks. " |
| 158 | f"({index.npartitions} != {x.numblocks[0]})" |
| 159 | ) |
no test coverage detected
searching dependent graphs…