(self)
| 522 | groupers: tuple[ResolvedGrouper, ...] |
| 523 | |
| 524 | def factorize(self) -> EncodedGroups: |
| 525 | from xarray.groupers import EncodedGroups |
| 526 | |
| 527 | groupers = self.groupers |
| 528 | |
| 529 | # At this point all arrays have been factorized. |
| 530 | codes = tuple(grouper.codes for grouper in groupers) |
| 531 | shape = tuple(grouper.size for grouper in groupers) |
| 532 | masks = tuple((code == -1) for code in codes) |
| 533 | # We broadcast the codes against each other |
| 534 | broadcasted_codes = broadcast(*codes) |
| 535 | # This fully broadcasted DataArray is used as a template later |
| 536 | first_codes = broadcasted_codes[0] |
| 537 | # Now we convert to a single variable GroupBy problem |
| 538 | _flatcodes = np.ravel_multi_index( |
| 539 | tuple(codes.data for codes in broadcasted_codes), shape, mode="wrap" |
| 540 | ) |
| 541 | # NaNs; as well as values outside the bins are coded by -1 |
| 542 | # Restore these after the raveling |
| 543 | broadcasted_masks = broadcast(*masks) |
| 544 | mask = functools.reduce(np.logical_or, broadcasted_masks) # type: ignore[arg-type] |
| 545 | _flatcodes = where(mask.data, -1, _flatcodes) |
| 546 | |
| 547 | full_index = pd.MultiIndex.from_product( |
| 548 | [list(grouper.full_index.values) for grouper in groupers], |
| 549 | names=tuple(grouper.name for grouper in groupers), |
| 550 | ) |
| 551 | if not full_index.is_unique: |
| 552 | raise ValueError( |
| 553 | "The output index for the GroupBy is non-unique. " |
| 554 | "This is a bug in the Grouper provided." |
| 555 | ) |
| 556 | # This will be unused when grouping by dask arrays, so skip.. |
| 557 | if not is_chunked_array(_flatcodes): |
| 558 | # Constructing an index from the product is wrong when there are missing groups |
| 559 | # (e.g. binning, resampling). Account for that now. |
| 560 | midx = full_index[np.sort(pd.unique(_flatcodes[~mask]))] |
| 561 | group_indices = _codes_to_group_indices(_flatcodes.ravel(), len(full_index)) |
| 562 | else: |
| 563 | midx = full_index |
| 564 | group_indices = None |
| 565 | |
| 566 | dim_name = "stacked_" + "_".join(str(grouper.name) for grouper in groupers) |
| 567 | |
| 568 | coords = Coordinates.from_pandas_multiindex(midx, dim=dim_name) |
| 569 | for grouper in groupers: |
| 570 | coords.variables[grouper.name].attrs = grouper.group.attrs |
| 571 | return EncodedGroups( |
| 572 | codes=first_codes.copy(data=_flatcodes), |
| 573 | full_index=full_index, |
| 574 | group_indices=group_indices, |
| 575 | unique_coord=Variable(dims=(dim_name,), data=midx.values), |
| 576 | coords=coords, |
| 577 | ) |
| 578 | |
| 579 | |
| 580 | class GroupBy(Generic[T_Xarray]): |
no test coverage detected