| 7362 | self[name] = (dims, data) |
| 7363 | |
| 7364 | def _set_numpy_data_from_dataframe( |
| 7365 | self, idx: pd.Index, arrays: list[tuple[Hashable, np.ndarray]], dims: tuple |
| 7366 | ) -> None: |
| 7367 | if not isinstance(idx, pd.MultiIndex): |
| 7368 | for name, values in arrays: |
| 7369 | self[name] = (dims, values) |
| 7370 | return |
| 7371 | |
| 7372 | # NB: similar, more general logic, now exists in |
| 7373 | # variable.unstack_once; we could consider combining them at some |
| 7374 | # point. |
| 7375 | |
| 7376 | shape = tuple(lev.size for lev in idx.levels) |
| 7377 | indexer = tuple(idx.codes) |
| 7378 | |
| 7379 | # We already verified that the MultiIndex has all unique values, so |
| 7380 | # there are missing values if and only if the size of output arrays is |
| 7381 | # larger that the index. |
| 7382 | missing_values = math.prod(shape) > idx.shape[0] |
| 7383 | |
| 7384 | for name, values in arrays: |
| 7385 | # NumPy indexing is much faster than using DataFrame.reindex() to |
| 7386 | # fill in missing values: |
| 7387 | # https://stackoverflow.com/a/35049899/809705 |
| 7388 | if missing_values: |
| 7389 | dtype, fill_value = xrdtypes.maybe_promote(values.dtype) |
| 7390 | data = np.full(shape, fill_value, dtype) |
| 7391 | else: |
| 7392 | # If there are no missing values, keep the existing dtype |
| 7393 | # instead of promoting to support NA, e.g., keep integer |
| 7394 | # columns as integers. |
| 7395 | # TODO: consider removing this special case, which doesn't |
| 7396 | # exist for sparse=True. |
| 7397 | data = np.zeros(shape, values.dtype) |
| 7398 | data[indexer] = values |
| 7399 | self[name] = (dims, data) |
| 7400 | |
| 7401 | @classmethod |
| 7402 | def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> Self: |