hub / github.com/pydata/xarray / _set_numpy_data_from_dataframe

Method _set_numpy_data_from_dataframe

xarray/core/dataset.py:7364–7399 · view source on GitHub ↗

(
        self, idx: pd.Index, arrays: list[tuple[Hashable, np.ndarray]], dims: tuple
    )

Source from the content-addressed store, hash-verified

7362	self[name] = (dims, data)
7363
7364	def _set_numpy_data_from_dataframe(
7365	self, idx: pd.Index, arrays: list[tuple[Hashable, np.ndarray]], dims: tuple
7366	) -> None:
7367	if not isinstance(idx, pd.MultiIndex):
7368	for name, values in arrays:
7369	self[name] = (dims, values)
7370	return
7371
7372	# NB: similar, more general logic, now exists in
7373	# variable.unstack_once; we could consider combining them at some
7374	# point.
7375
7376	shape = tuple(lev.size for lev in idx.levels)
7377	indexer = tuple(idx.codes)
7378
7379	# We already verified that the MultiIndex has all unique values, so
7380	# there are missing values if and only if the size of output arrays is
7381	# larger that the index.
7382	missing_values = math.prod(shape) > idx.shape[0]
7383
7384	for name, values in arrays:
7385	# NumPy indexing is much faster than using DataFrame.reindex() to
7386	# fill in missing values:
7387	# https://stackoverflow.com/a/35049899/809705
7388	if missing_values:
7389	dtype, fill_value = xrdtypes.maybe_promote(values.dtype)
7390	data = np.full(shape, fill_value, dtype)
7391	else:
7392	# If there are no missing values, keep the existing dtype
7393	# instead of promoting to support NA, e.g., keep integer
7394	# columns as integers.
7395	# TODO: consider removing this special case, which doesn't
7396	# exist for sparse=True.
7397	data = np.zeros(shape, values.dtype)
7398	data[indexer] = values
7399	self[name] = (dims, data)
7400
7401	@classmethod
7402	def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> Self:

from_dataframeMethod · 0.80

prodMethod · 0.45

no test coverage detected