Method fit

qlib/data/dataset/processor.py:204–218 · view source on GitHub ↗

(self, df: pd.DataFrame = None)

Source from the content-addressed store, hash-verified

202	self.fields_group = fields_group
203
204	def fit(self, df: pd.DataFrame = None):
205	df = fetch_df_by_index(df, slice(self.fit_start_time, self.fit_end_time), level="datetime")
206	cols = get_group_columns(df, self.fields_group)
207	self.min_val = np.nanmin(df[cols].values, axis=0)
208	self.max_val = np.nanmax(df[cols].values, axis=0)
209	self.ignore = self.min_val == self.max_val
210	# To improve the speed, we set the value of `min_val` to `0` for the columns that do not need to be processed,
211	# and the value of `max_val` to `1`, when using `(x - min_val) / (max_val - min_val)` for uniform calculation,
212	# the columns that do not need to be processed will be calculated by `(x - 0) / (1 - 0)`,
213	# as you can see, the columns that do not need to be processed, will not be affected.
214	for _i, _con in enumerate(self.ignore):
215	if _con:
216	self.min_val[_i] = 0
217	self.max_val[_i] = 1
218	self.cols = cols
219
220	def __call__(self, df):
221	def normalize(x, min_val=self.min_val, max_val=self.max_val):

test_MinMaxNormMethod · 0.95

fetch_df_by_indexFunction · 0.85

get_group_columnsFunction · 0.85

test_MinMaxNormMethod · 0.76