| 236 | self.fields_group = fields_group |
| 237 | |
| 238 | def fit(self, df: pd.DataFrame = None): |
| 239 | df = fetch_df_by_index(df, slice(self.fit_start_time, self.fit_end_time), level="datetime") |
| 240 | cols = get_group_columns(df, self.fields_group) |
| 241 | self.mean_train = np.nanmean(df[cols].values, axis=0) |
| 242 | self.std_train = np.nanstd(df[cols].values, axis=0) |
| 243 | self.ignore = self.std_train == 0 |
| 244 | # To improve the speed, we set the value of `std_train` to `1` for the columns that do not need to be processed, |
| 245 | # and the value of `mean_train` to `0`, when using `(x - mean_train) / std_train` for uniform calculation, |
| 246 | # the columns that do not need to be processed will be calculated by `(x - 0) / 1`, |
| 247 | # as you can see, the columns that do not need to be processed, will not be affected. |
| 248 | for _i, _con in enumerate(self.ignore): |
| 249 | if _con: |
| 250 | self.std_train[_i] = 1 |
| 251 | self.mean_train[_i] = 0 |
| 252 | self.cols = cols |
| 253 | |
| 254 | def __call__(self, df): |
| 255 | def normalize(x, mean_train=self.mean_train, std_train=self.std_train): |