hub / github.com/microsoft/qlib / _fetch_data

Method _fetch_data

qlib/data/dataset/handler.py:279–327 · view source on GitHub ↗

(
        self,
        data_storage,
        selector: Union[pd.Timestamp, slice, str, pd.Index] = slice(None, None),
        level: Union[str, int] = "datetime",
        col_set: Union[str, List[str]] = DataHandlerABC.CS_ALL,
        squeeze: bool = False,
        proc_func: Callable = None,
    )

Source from the content-addressed store, hash-verified

277	)
278
279	def _fetch_data(
280	self,
281	data_storage,
282	selector: Union[pd.Timestamp, slice, str, pd.Index] = slice(None, None),
283	level: Union[str, int] = "datetime",
284	col_set: Union[str, List[str]] = DataHandlerABC.CS_ALL,
285	squeeze: bool = False,
286	proc_func: Callable = None,
287	):
288	# This method is extracted for sharing in subclasses
289	from .storage import BaseHandlerStorage # pylint: disable=C0415
290
291	# Following conflicts may occur
292	# - Does [20200101", "20210101"] mean selecting this slice or these two days?
293	# To solve this issue
294	# - slice have higher priorities (except when level is none)
295	if isinstance(selector, (tuple, list)) and level is not None:
296	# when level is None, the argument will be passed in directly
297	# we don't have to convert it into slice
298	try:
299	selector = slice(*selector)
300	except ValueError:
301	get_module_logger("DataHandlerLP").info(f"Fail to converting to query to slice. It will used directly")
302
303	if isinstance(data_storage, pd.DataFrame):
304	data_df = data_storage
305	if proc_func is not None:
306	# FIXME: fetching by time first will be more friendly to `proc_func`
307	# Copy in case of `proc_func` changing the data inplace....
308	data_df = proc_func(fetch_df_by_index(data_df, selector, level, fetch_orig=self.fetch_orig).copy())
309	data_df = fetch_df_by_col(data_df, col_set)
310	else:
311	# Fetch column first will be more friendly to SepDataFrame
312	data_df = fetch_df_by_col(data_df, col_set)
313	data_df = fetch_df_by_index(data_df, selector, level, fetch_orig=self.fetch_orig)
314	elif isinstance(data_storage, BaseHandlerStorage):
315	if proc_func is not None:
316	raise ValueError(f"proc_func is not supported by the storage {type(data_storage)}")
317	data_df = data_storage.fetch(selector=selector, level=level, col_set=col_set, fetch_orig=self.fetch_orig)
318	else:
319	raise TypeError(f"data_storage should be pd.DataFrame\|HashingStockStorage, not {type(data_storage)}")
320
321	if squeeze:
322	# squeeze columns
323	data_df = data_df.squeeze()
324	# squeeze index
325	if isinstance(selector, (str, pd.Timestamp)):
326	data_df = data_df.reset_index(level=level, drop=True)
327	return data_df
328
329	def get_cols(self, col_set=DataHandlerABC.CS_ALL) -> list:
330	"""

Callers 2

fetchMethod · 0.95

fetchMethod · 0.80

Calls 5

fetch_df_by_indexFunction · 0.85

fetch_df_by_colFunction · 0.85

copyMethod · 0.80

infoMethod · 0.45

fetchMethod · 0.45

Tested by

no test coverage detected