MCPcopy Index your code
hub / github.com/pydata/xarray / factorize

Method factorize

xarray/groupers.py:424–470  ·  view source on GitHub ↗
(self, group: T_Group)

Source from the content-addressed store, hash-verified

422 return binned.codes.reshape(data.shape)
423
424 def factorize(self, group: T_Group) -> EncodedGroups:
425 if isinstance(group, _DummyGroup):
426 group = DataArray(group.data, dims=group.dims, name=group.name)
427 by_is_chunked = is_chunked_array(group.data)
428 if isinstance(self.bins, int) and by_is_chunked:
429 raise ValueError(
430 f"Bin edges must be provided when grouping by chunked arrays. Received {self.bins=!r} instead"
431 )
432 codes = apply_ufunc(
433 self._pandas_cut_wrapper,
434 group,
435 dask="parallelized",
436 keep_attrs=True,
437 output_dtypes=[np.int64],
438 )
439 if not by_is_chunked and array_all(codes == -1):
440 raise ValueError(
441 f"None of the data falls within bins with edges {self.bins!r}"
442 )
443
444 new_dim_name = f"{group.name}_bins"
445 codes.name = new_dim_name
446
447 # This seems silly, but it lets us have Pandas handle the complexity
448 # of `labels`, `precision`, and `include_lowest`, even when group is a chunked array
449 # Pandas ignores labels when IntervalIndex is passed
450 if self.labels is None or not isinstance(self.bins, pd.IntervalIndex):
451 dummy, _ = self._cut(np.array([0]).astype(group.dtype))
452 full_index = dummy.categories
453 else:
454 full_index = pd.Index(self.labels)
455
456 if not by_is_chunked:
457 uniques = np.sort(pd.unique(codes.data.ravel()))
458 unique_values = full_index[uniques[uniques != -1]]
459 else:
460 unique_values = full_index
461
462 unique_coord = Variable(
463 dims=new_dim_name, data=unique_values, attrs=group.attrs
464 )
465 return EncodedGroups(
466 codes=codes,
467 full_index=full_index,
468 unique_coord=unique_coord,
469 coords=coordinates_from_variable(unique_coord),
470 )
471
472
473@dataclass(repr=False)

Callers

nothing calls this directly

Calls 9

_cutMethod · 0.95
DataArrayClass · 0.90
is_chunked_arrayFunction · 0.90
apply_ufuncFunction · 0.90
array_allFunction · 0.90
VariableClass · 0.90
EncodedGroupsClass · 0.85
astypeMethod · 0.45

Tested by

no test coverage detected