MCPcopy
hub / github.com/microsoft/qlib / MinMaxNorm

Class MinMaxNorm

qlib/data/dataset/processor.py:196–225  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

194
195
196class MinMaxNorm(Processor):
197 def __init__(self, fit_start_time, fit_end_time, fields_group=None):
198 # NOTE: correctly set the `fit_start_time` and `fit_end_time` is very important !!!
199 # `fit_end_time` **must not** include any information from the test data!!!
200 self.fit_start_time = fit_start_time
201 self.fit_end_time = fit_end_time
202 self.fields_group = fields_group
203
204 def fit(self, df: pd.DataFrame = None):
205 df = fetch_df_by_index(df, slice(self.fit_start_time, self.fit_end_time), level="datetime")
206 cols = get_group_columns(df, self.fields_group)
207 self.min_val = np.nanmin(df[cols].values, axis=0)
208 self.max_val = np.nanmax(df[cols].values, axis=0)
209 self.ignore = self.min_val == self.max_val
210 # To improve the speed, we set the value of `min_val` to `0` for the columns that do not need to be processed,
211 # and the value of `max_val` to `1`, when using `(x - min_val) / (max_val - min_val)` for uniform calculation,
212 # the columns that do not need to be processed will be calculated by `(x - 0) / (1 - 0)`,
213 # as you can see, the columns that do not need to be processed, will not be affected.
214 for _i, _con in enumerate(self.ignore):
215 if _con:
216 self.min_val[_i] = 0
217 self.max_val[_i] = 1
218 self.cols = cols
219
220 def __call__(self, df):
221 def normalize(x, min_val=self.min_val, max_val=self.max_val):
222 return (x - min_val) / (max_val - min_val)
223
224 df.loc(axis=1)[self.cols] = normalize(df[self.cols].values)
225 return df
226
227
228class ZScoreNorm(Processor):

Callers 1

test_MinMaxNormMethod · 0.90

Calls

no outgoing calls

Tested by 1

test_MinMaxNormMethod · 0.72