Grouper object for grouping by a categorical variable. Parameters ---------- labels: array-like, optional Group labels to aggregate on. This is required when grouping by a chunked array type (e.g. dask or cubed) since it is used to construct the coordinate on the ou
| 208 | |
| 209 | @dataclass |
| 210 | class UniqueGrouper(Grouper): |
| 211 | """ |
| 212 | Grouper object for grouping by a categorical variable. |
| 213 | |
| 214 | Parameters |
| 215 | ---------- |
| 216 | labels: array-like, optional |
| 217 | Group labels to aggregate on. This is required when grouping by a chunked array type |
| 218 | (e.g. dask or cubed) since it is used to construct the coordinate on the output. |
| 219 | Grouped operations will only be run on the specified group labels. Any group that is not |
| 220 | present in ``labels`` will be ignored. |
| 221 | """ |
| 222 | |
| 223 | _group_as_index: pd.Index | None = field(default=None, repr=False, init=False) |
| 224 | labels: ArrayLike | None = field(default=None) |
| 225 | |
| 226 | @property |
| 227 | def group_as_index(self) -> pd.Index: |
| 228 | """Caches the group DataArray as a pandas Index.""" |
| 229 | if self._group_as_index is None: |
| 230 | if self.group.ndim == 1: |
| 231 | self._group_as_index = self.group.to_index() |
| 232 | else: |
| 233 | self._group_as_index = pd.Index(np.array(self.group).ravel()) |
| 234 | return self._group_as_index |
| 235 | |
| 236 | def reset(self) -> Self: |
| 237 | return type(self)() |
| 238 | |
| 239 | def factorize(self, group: T_Group) -> EncodedGroups: |
| 240 | self.group = group |
| 241 | |
| 242 | if is_chunked_array(group.data) and self.labels is None: |
| 243 | raise ValueError( |
| 244 | "When grouping by a dask array, `labels` must be passed using " |
| 245 | "a UniqueGrouper object." |
| 246 | ) |
| 247 | if self.labels is not None: |
| 248 | return self._factorize_given_labels(group) |
| 249 | |
| 250 | index = self.group_as_index |
| 251 | is_unique_and_monotonic = isinstance(self.group, _DummyGroup) or ( |
| 252 | index.is_unique |
| 253 | and (index.is_monotonic_increasing or index.is_monotonic_decreasing) |
| 254 | ) |
| 255 | is_dimension = self.group.dims == (self.group.name,) |
| 256 | can_squeeze = is_dimension and is_unique_and_monotonic |
| 257 | |
| 258 | if can_squeeze: |
| 259 | return self._factorize_dummy() |
| 260 | else: |
| 261 | return self._factorize_unique() |
| 262 | |
| 263 | def _factorize_given_labels(self, group: T_Group) -> EncodedGroups: |
| 264 | codes = apply_ufunc( |
| 265 | _factorize_given_labels, |
| 266 | group, |
| 267 | kwargs={"labels": self.labels}, |
no outgoing calls
no test coverage detected
searching dependent graphs…