(
self,
data_storage,
selector: Union[pd.Timestamp, slice, str, pd.Index] = slice(None, None),
level: Union[str, int] = "datetime",
col_set: Union[str, List[str]] = DataHandlerABC.CS_ALL,
squeeze: bool = False,
proc_func: Callable = None,
)
| 277 | ) |
| 278 | |
| 279 | def _fetch_data( |
| 280 | self, |
| 281 | data_storage, |
| 282 | selector: Union[pd.Timestamp, slice, str, pd.Index] = slice(None, None), |
| 283 | level: Union[str, int] = "datetime", |
| 284 | col_set: Union[str, List[str]] = DataHandlerABC.CS_ALL, |
| 285 | squeeze: bool = False, |
| 286 | proc_func: Callable = None, |
| 287 | ): |
| 288 | # This method is extracted for sharing in subclasses |
| 289 | from .storage import BaseHandlerStorage # pylint: disable=C0415 |
| 290 | |
| 291 | # Following conflicts may occur |
| 292 | # - Does [20200101", "20210101"] mean selecting this slice or these two days? |
| 293 | # To solve this issue |
| 294 | # - slice have higher priorities (except when level is none) |
| 295 | if isinstance(selector, (tuple, list)) and level is not None: |
| 296 | # when level is None, the argument will be passed in directly |
| 297 | # we don't have to convert it into slice |
| 298 | try: |
| 299 | selector = slice(*selector) |
| 300 | except ValueError: |
| 301 | get_module_logger("DataHandlerLP").info(f"Fail to converting to query to slice. It will used directly") |
| 302 | |
| 303 | if isinstance(data_storage, pd.DataFrame): |
| 304 | data_df = data_storage |
| 305 | if proc_func is not None: |
| 306 | # FIXME: fetching by time first will be more friendly to `proc_func` |
| 307 | # Copy in case of `proc_func` changing the data inplace.... |
| 308 | data_df = proc_func(fetch_df_by_index(data_df, selector, level, fetch_orig=self.fetch_orig).copy()) |
| 309 | data_df = fetch_df_by_col(data_df, col_set) |
| 310 | else: |
| 311 | # Fetch column first will be more friendly to SepDataFrame |
| 312 | data_df = fetch_df_by_col(data_df, col_set) |
| 313 | data_df = fetch_df_by_index(data_df, selector, level, fetch_orig=self.fetch_orig) |
| 314 | elif isinstance(data_storage, BaseHandlerStorage): |
| 315 | if proc_func is not None: |
| 316 | raise ValueError(f"proc_func is not supported by the storage {type(data_storage)}") |
| 317 | data_df = data_storage.fetch(selector=selector, level=level, col_set=col_set, fetch_orig=self.fetch_orig) |
| 318 | else: |
| 319 | raise TypeError(f"data_storage should be pd.DataFrame|HashingStockStorage, not {type(data_storage)}") |
| 320 | |
| 321 | if squeeze: |
| 322 | # squeeze columns |
| 323 | data_df = data_df.squeeze() |
| 324 | # squeeze index |
| 325 | if isinstance(selector, (str, pd.Timestamp)): |
| 326 | data_df = data_df.reset_index(level=level, drop=True) |
| 327 | return data_df |
| 328 | |
| 329 | def get_cols(self, col_set=DataHandlerABC.CS_ALL) -> list: |
| 330 | """ |
no test coverage detected