Construct a Dask DataFrame from a Python Dictionary Parameters ---------- data : dict Of the form {field : array-like} or {field : dict}. npartitions : int The number of partitions of the index to create. Note that depending on the size and index of the
(
data,
npartitions,
orient="columns",
dtype=None,
columns=None,
constructor=pd.DataFrame,
)
| 5033 | |
| 5034 | @dataframe_creation_dispatch.register_inplace("pandas") |
| 5035 | def from_dict( |
| 5036 | data, |
| 5037 | npartitions, |
| 5038 | orient="columns", |
| 5039 | dtype=None, |
| 5040 | columns=None, |
| 5041 | constructor=pd.DataFrame, |
| 5042 | ): |
| 5043 | """ |
| 5044 | Construct a Dask DataFrame from a Python Dictionary |
| 5045 | |
| 5046 | Parameters |
| 5047 | ---------- |
| 5048 | data : dict |
| 5049 | Of the form {field : array-like} or {field : dict}. |
| 5050 | npartitions : int |
| 5051 | The number of partitions of the index to create. Note that depending on |
| 5052 | the size and index of the dataframe, the output may have fewer |
| 5053 | partitions than requested. |
| 5054 | orient : {'columns', 'index', 'tight'}, default 'columns' |
| 5055 | The "orientation" of the data. If the keys of the passed dict |
| 5056 | should be the columns of the resulting DataFrame, pass 'columns' |
| 5057 | (default). Otherwise if the keys should be rows, pass 'index'. |
| 5058 | If 'tight', assume a dict with keys |
| 5059 | ['index', 'columns', 'data', 'index_names', 'column_names']. |
| 5060 | dtype: bool |
| 5061 | Data type to force, otherwise infer. |
| 5062 | columns: string, optional |
| 5063 | Column labels to use when ``orient='index'``. Raises a ValueError |
| 5064 | if used with ``orient='columns'`` or ``orient='tight'``. |
| 5065 | constructor: class, default pd.DataFrame |
| 5066 | Class with which ``from_dict`` should be called with. |
| 5067 | |
| 5068 | Examples |
| 5069 | -------- |
| 5070 | >>> import dask.dataframe as dd |
| 5071 | >>> ddf = dd.from_dict({"num1": [1, 2, 3, 4], "num2": [7, 8, 9, 10]}, npartitions=2) |
| 5072 | """ |
| 5073 | |
| 5074 | collection_types = {type(v) for v in data.values() if is_dask_collection(v)} |
| 5075 | if collection_types: |
| 5076 | raise NotImplementedError( |
| 5077 | "from_dict doesn't currently support Dask collections as inputs. " |
| 5078 | f"Objects of type {collection_types} were given in the input dict." |
| 5079 | ) |
| 5080 | |
| 5081 | return from_pandas( |
| 5082 | constructor.from_dict(data, orient, dtype, columns), |
| 5083 | npartitions, |
| 5084 | ) |
| 5085 | |
| 5086 | |
| 5087 | def from_dask_array(x, columns=None, index=None, meta=None) -> DataFrame: |
searching dependent graphs…