| 194 | |
| 195 | |
| 196 | class MinMaxNorm(Processor): |
| 197 | def __init__(self, fit_start_time, fit_end_time, fields_group=None): |
| 198 | # NOTE: correctly set the `fit_start_time` and `fit_end_time` is very important !!! |
| 199 | # `fit_end_time` **must not** include any information from the test data!!! |
| 200 | self.fit_start_time = fit_start_time |
| 201 | self.fit_end_time = fit_end_time |
| 202 | self.fields_group = fields_group |
| 203 | |
| 204 | def fit(self, df: pd.DataFrame = None): |
| 205 | df = fetch_df_by_index(df, slice(self.fit_start_time, self.fit_end_time), level="datetime") |
| 206 | cols = get_group_columns(df, self.fields_group) |
| 207 | self.min_val = np.nanmin(df[cols].values, axis=0) |
| 208 | self.max_val = np.nanmax(df[cols].values, axis=0) |
| 209 | self.ignore = self.min_val == self.max_val |
| 210 | # To improve the speed, we set the value of `min_val` to `0` for the columns that do not need to be processed, |
| 211 | # and the value of `max_val` to `1`, when using `(x - min_val) / (max_val - min_val)` for uniform calculation, |
| 212 | # the columns that do not need to be processed will be calculated by `(x - 0) / (1 - 0)`, |
| 213 | # as you can see, the columns that do not need to be processed, will not be affected. |
| 214 | for _i, _con in enumerate(self.ignore): |
| 215 | if _con: |
| 216 | self.min_val[_i] = 0 |
| 217 | self.max_val[_i] = 1 |
| 218 | self.cols = cols |
| 219 | |
| 220 | def __call__(self, df): |
| 221 | def normalize(x, min_val=self.min_val, max_val=self.max_val): |
| 222 | return (x - min_val) / (max_val - min_val) |
| 223 | |
| 224 | df.loc(axis=1)[self.cols] = normalize(df[self.cols].values) |
| 225 | return df |
| 226 | |
| 227 | |
| 228 | class ZScoreNorm(Processor): |
no outgoing calls