(self, group: T_Group)
| 422 | return binned.codes.reshape(data.shape) |
| 423 | |
| 424 | def factorize(self, group: T_Group) -> EncodedGroups: |
| 425 | if isinstance(group, _DummyGroup): |
| 426 | group = DataArray(group.data, dims=group.dims, name=group.name) |
| 427 | by_is_chunked = is_chunked_array(group.data) |
| 428 | if isinstance(self.bins, int) and by_is_chunked: |
| 429 | raise ValueError( |
| 430 | f"Bin edges must be provided when grouping by chunked arrays. Received {self.bins=!r} instead" |
| 431 | ) |
| 432 | codes = apply_ufunc( |
| 433 | self._pandas_cut_wrapper, |
| 434 | group, |
| 435 | dask="parallelized", |
| 436 | keep_attrs=True, |
| 437 | output_dtypes=[np.int64], |
| 438 | ) |
| 439 | if not by_is_chunked and array_all(codes == -1): |
| 440 | raise ValueError( |
| 441 | f"None of the data falls within bins with edges {self.bins!r}" |
| 442 | ) |
| 443 | |
| 444 | new_dim_name = f"{group.name}_bins" |
| 445 | codes.name = new_dim_name |
| 446 | |
| 447 | # This seems silly, but it lets us have Pandas handle the complexity |
| 448 | # of `labels`, `precision`, and `include_lowest`, even when group is a chunked array |
| 449 | # Pandas ignores labels when IntervalIndex is passed |
| 450 | if self.labels is None or not isinstance(self.bins, pd.IntervalIndex): |
| 451 | dummy, _ = self._cut(np.array([0]).astype(group.dtype)) |
| 452 | full_index = dummy.categories |
| 453 | else: |
| 454 | full_index = pd.Index(self.labels) |
| 455 | |
| 456 | if not by_is_chunked: |
| 457 | uniques = np.sort(pd.unique(codes.data.ravel())) |
| 458 | unique_values = full_index[uniques[uniques != -1]] |
| 459 | else: |
| 460 | unique_values = full_index |
| 461 | |
| 462 | unique_coord = Variable( |
| 463 | dims=new_dim_name, data=unique_values, attrs=group.attrs |
| 464 | ) |
| 465 | return EncodedGroups( |
| 466 | codes=codes, |
| 467 | full_index=full_index, |
| 468 | unique_coord=unique_coord, |
| 469 | coords=coordinates_from_variable(unique_coord), |
| 470 | ) |
| 471 | |
| 472 | |
| 473 | @dataclass(repr=False) |
nothing calls this directly
no test coverage detected