MCPcopy
hub / github.com/pydata/xarray / factorize

Method factorize

xarray/groupers.py:916–1055  ·  view source on GitHub ↗
(self, group: T_Group)

Source from the content-addressed store, hash-verified

914 )
915
916 def factorize(self, group: T_Group) -> EncodedGroups:
917 if group.ndim != 1:
918 raise ValueError(
919 "SeasonResampler can only be used to resample by 1D arrays."
920 )
921 if not isinstance(group, DataArray) or not _contains_datetime_like_objects(
922 group.variable
923 ):
924 raise ValueError(
925 "SeasonResampler can only be used to group by datetime-like DataArrays."
926 )
927
928 seasons = self.seasons
929 season_inds = self.season_inds
930 season_tuples = self.season_tuples
931
932 nstr = max(len(s) for s in seasons)
933 year = group.dt.year.astype(int)
934 month = group.dt.month.astype(int)
935 season_label = np.full(group.shape, "", dtype=f"U{nstr}")
936
937 # offset years for seasons with December and January
938 for season_str, season_ind in zip(seasons, season_inds, strict=True):
939 season_label[month.isin(season_ind)] = season_str
940 if "DJ" in season_str:
941 after_dec = season_ind[season_str.index("D") + 1 :]
942 # important: this is assuming non-overlapping seasons
943 year[month.isin(after_dec)] -= 1
944
945 # Allow users to skip one or more months?
946 # present_seasons is a mask that is True for months that are requested in the output
947 present_seasons = season_label != ""
948 if present_seasons.all():
949 # avoid copies if we can.
950 present_seasons = slice(None)
951 frame = pd.DataFrame(
952 data={
953 "index": np.arange(group[present_seasons].size),
954 "month": month[present_seasons],
955 },
956 index=pd.MultiIndex.from_arrays(
957 [year.data[present_seasons], season_label[present_seasons]],
958 names=["year", "season"],
959 ),
960 )
961
962 agged = (
963 frame["index"]
964 .groupby(["year", "season"], sort=False)
965 .agg(["first", "count"])
966 )
967 first_items = agged["first"]
968 counts = agged["count"]
969
970 index_class: type[CFTimeIndex | pd.DatetimeIndex]
971 datetime_class: CFTimeDatetime | Callable[..., np.datetime64]
972 if _contains_cftime_datetimes(group.data):
973 index_class = CFTimeIndex

Callers

nothing calls this directly

Calls 13

first_n_itemsFunction · 0.90
typeFunction · 0.85
EncodedGroupsClass · 0.85
isinMethod · 0.80
arangeMethod · 0.80
repeatMethod · 0.80
astypeMethod · 0.45
indexMethod · 0.45
allMethod · 0.45
groupbyMethod · 0.45

Tested by

no test coverage detected