MCPcopy Index your code
hub / github.com/microsoft/qlib / _fetch_data

Method _fetch_data

qlib/data/dataset/handler.py:279–327  ·  view source on GitHub ↗
(
        self,
        data_storage,
        selector: Union[pd.Timestamp, slice, str, pd.Index] = slice(None, None),
        level: Union[str, int] = "datetime",
        col_set: Union[str, List[str]] = DataHandlerABC.CS_ALL,
        squeeze: bool = False,
        proc_func: Callable = None,
    )

Source from the content-addressed store, hash-verified

277 )
278
279 def _fetch_data(
280 self,
281 data_storage,
282 selector: Union[pd.Timestamp, slice, str, pd.Index] = slice(None, None),
283 level: Union[str, int] = "datetime",
284 col_set: Union[str, List[str]] = DataHandlerABC.CS_ALL,
285 squeeze: bool = False,
286 proc_func: Callable = None,
287 ):
288 # This method is extracted for sharing in subclasses
289 from .storage import BaseHandlerStorage # pylint: disable=C0415
290
291 # Following conflicts may occur
292 # - Does [20200101", "20210101"] mean selecting this slice or these two days?
293 # To solve this issue
294 # - slice have higher priorities (except when level is none)
295 if isinstance(selector, (tuple, list)) and level is not None:
296 # when level is None, the argument will be passed in directly
297 # we don't have to convert it into slice
298 try:
299 selector = slice(*selector)
300 except ValueError:
301 get_module_logger("DataHandlerLP").info(f"Fail to converting to query to slice. It will used directly")
302
303 if isinstance(data_storage, pd.DataFrame):
304 data_df = data_storage
305 if proc_func is not None:
306 # FIXME: fetching by time first will be more friendly to `proc_func`
307 # Copy in case of `proc_func` changing the data inplace....
308 data_df = proc_func(fetch_df_by_index(data_df, selector, level, fetch_orig=self.fetch_orig).copy())
309 data_df = fetch_df_by_col(data_df, col_set)
310 else:
311 # Fetch column first will be more friendly to SepDataFrame
312 data_df = fetch_df_by_col(data_df, col_set)
313 data_df = fetch_df_by_index(data_df, selector, level, fetch_orig=self.fetch_orig)
314 elif isinstance(data_storage, BaseHandlerStorage):
315 if proc_func is not None:
316 raise ValueError(f"proc_func is not supported by the storage {type(data_storage)}")
317 data_df = data_storage.fetch(selector=selector, level=level, col_set=col_set, fetch_orig=self.fetch_orig)
318 else:
319 raise TypeError(f"data_storage should be pd.DataFrame|HashingStockStorage, not {type(data_storage)}")
320
321 if squeeze:
322 # squeeze columns
323 data_df = data_df.squeeze()
324 # squeeze index
325 if isinstance(selector, (str, pd.Timestamp)):
326 data_df = data_df.reset_index(level=level, drop=True)
327 return data_df
328
329 def get_cols(self, col_set=DataHandlerABC.CS_ALL) -> list:
330 """

Callers 2

fetchMethod · 0.95
fetchMethod · 0.80

Calls 5

fetch_df_by_indexFunction · 0.85
fetch_df_by_colFunction · 0.85
copyMethod · 0.80
infoMethod · 0.45
fetchMethod · 0.45

Tested by

no test coverage detected