MCPcopy Index your code
hub / github.com/pydata/xarray / factorize

Method factorize

xarray/core/groupby.py:524–577  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

522 groupers: tuple[ResolvedGrouper, ...]
523
524 def factorize(self) -> EncodedGroups:
525 from xarray.groupers import EncodedGroups
526
527 groupers = self.groupers
528
529 # At this point all arrays have been factorized.
530 codes = tuple(grouper.codes for grouper in groupers)
531 shape = tuple(grouper.size for grouper in groupers)
532 masks = tuple((code == -1) for code in codes)
533 # We broadcast the codes against each other
534 broadcasted_codes = broadcast(*codes)
535 # This fully broadcasted DataArray is used as a template later
536 first_codes = broadcasted_codes[0]
537 # Now we convert to a single variable GroupBy problem
538 _flatcodes = np.ravel_multi_index(
539 tuple(codes.data for codes in broadcasted_codes), shape, mode="wrap"
540 )
541 # NaNs; as well as values outside the bins are coded by -1
542 # Restore these after the raveling
543 broadcasted_masks = broadcast(*masks)
544 mask = functools.reduce(np.logical_or, broadcasted_masks) # type: ignore[arg-type]
545 _flatcodes = where(mask.data, -1, _flatcodes)
546
547 full_index = pd.MultiIndex.from_product(
548 [list(grouper.full_index.values) for grouper in groupers],
549 names=tuple(grouper.name for grouper in groupers),
550 )
551 if not full_index.is_unique:
552 raise ValueError(
553 "The output index for the GroupBy is non-unique. "
554 "This is a bug in the Grouper provided."
555 )
556 # This will be unused when grouping by dask arrays, so skip..
557 if not is_chunked_array(_flatcodes):
558 # Constructing an index from the product is wrong when there are missing groups
559 # (e.g. binning, resampling). Account for that now.
560 midx = full_index[np.sort(pd.unique(_flatcodes[~mask]))]
561 group_indices = _codes_to_group_indices(_flatcodes.ravel(), len(full_index))
562 else:
563 midx = full_index
564 group_indices = None
565
566 dim_name = "stacked_" + "_".join(str(grouper.name) for grouper in groupers)
567
568 coords = Coordinates.from_pandas_multiindex(midx, dim=dim_name)
569 for grouper in groupers:
570 coords.variables[grouper.name].attrs = grouper.group.attrs
571 return EncodedGroups(
572 codes=first_codes.copy(data=_flatcodes),
573 full_index=full_index,
574 group_indices=group_indices,
575 unique_coord=Variable(dims=(dim_name,), data=midx.values),
576 coords=coords,
577 )
578
579
580class GroupBy(Generic[T_Xarray]):

Callers 4

to_indexMethod · 0.45
stackMethod · 0.45
__post_init__Method · 0.45
__init__Method · 0.45

Calls 10

broadcastFunction · 0.90
whereFunction · 0.90
is_chunked_arrayFunction · 0.90
EncodedGroupsClass · 0.90
VariableClass · 0.90
_codes_to_group_indicesFunction · 0.85
reduceMethod · 0.45
joinMethod · 0.45
copyMethod · 0.45

Tested by

no test coverage detected