MCPcopy
hub / github.com/pydata/xarray / UniqueGrouper

Class UniqueGrouper

xarray/groupers.py:210–340  ·  view source on GitHub ↗

Grouper object for grouping by a categorical variable. Parameters ---------- labels: array-like, optional Group labels to aggregate on. This is required when grouping by a chunked array type (e.g. dask or cubed) since it is used to construct the coordinate on the ou

Source from the content-addressed store, hash-verified

208
209@dataclass
210class UniqueGrouper(Grouper):
211 """
212 Grouper object for grouping by a categorical variable.
213
214 Parameters
215 ----------
216 labels: array-like, optional
217 Group labels to aggregate on. This is required when grouping by a chunked array type
218 (e.g. dask or cubed) since it is used to construct the coordinate on the output.
219 Grouped operations will only be run on the specified group labels. Any group that is not
220 present in ``labels`` will be ignored.
221 """
222
223 _group_as_index: pd.Index | None = field(default=None, repr=False, init=False)
224 labels: ArrayLike | None = field(default=None)
225
226 @property
227 def group_as_index(self) -> pd.Index:
228 """Caches the group DataArray as a pandas Index."""
229 if self._group_as_index is None:
230 if self.group.ndim == 1:
231 self._group_as_index = self.group.to_index()
232 else:
233 self._group_as_index = pd.Index(np.array(self.group).ravel())
234 return self._group_as_index
235
236 def reset(self) -> Self:
237 return type(self)()
238
239 def factorize(self, group: T_Group) -> EncodedGroups:
240 self.group = group
241
242 if is_chunked_array(group.data) and self.labels is None:
243 raise ValueError(
244 "When grouping by a dask array, `labels` must be passed using "
245 "a UniqueGrouper object."
246 )
247 if self.labels is not None:
248 return self._factorize_given_labels(group)
249
250 index = self.group_as_index
251 is_unique_and_monotonic = isinstance(self.group, _DummyGroup) or (
252 index.is_unique
253 and (index.is_monotonic_increasing or index.is_monotonic_decreasing)
254 )
255 is_dimension = self.group.dims == (self.group.name,)
256 can_squeeze = is_dimension and is_unique_and_monotonic
257
258 if can_squeeze:
259 return self._factorize_dummy()
260 else:
261 return self._factorize_unique()
262
263 def _factorize_given_labels(self, group: T_Group) -> EncodedGroups:
264 codes = apply_ufunc(
265 _factorize_given_labels,
266 group,
267 kwargs={"labels": self.labels},

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…