fetch data from underlying data source Design motivation: - providing a unified interface for underlying data. - Potential to make the interface more friendly. - User can improve performance when fetching data in this extra layer Parameters
(
self,
selector: Union[pd.Timestamp, slice, str, pd.Index] = slice(None, None),
level: Union[str, int] = "datetime",
col_set: Union[str, List[str]] = DataHandlerABC.CS_ALL,
data_key: DATA_KEY_TYPE = DataHandlerABC.DK_I,
squeeze: bool = False,
proc_func: Optional[Callable] = None,
)
| 196 | # TODO: cache |
| 197 | |
| 198 | def fetch( |
| 199 | self, |
| 200 | selector: Union[pd.Timestamp, slice, str, pd.Index] = slice(None, None), |
| 201 | level: Union[str, int] = "datetime", |
| 202 | col_set: Union[str, List[str]] = DataHandlerABC.CS_ALL, |
| 203 | data_key: DATA_KEY_TYPE = DataHandlerABC.DK_I, |
| 204 | squeeze: bool = False, |
| 205 | proc_func: Optional[Callable] = None, |
| 206 | ) -> pd.DataFrame: |
| 207 | """ |
| 208 | fetch data from underlying data source |
| 209 | |
| 210 | Design motivation: |
| 211 | - providing a unified interface for underlying data. |
| 212 | - Potential to make the interface more friendly. |
| 213 | - User can improve performance when fetching data in this extra layer |
| 214 | |
| 215 | Parameters |
| 216 | ---------- |
| 217 | selector : Union[pd.Timestamp, slice, str] |
| 218 | describe how to select data by index |
| 219 | It can be categories as following |
| 220 | |
| 221 | - fetch single index |
| 222 | - fetch a range of index |
| 223 | |
| 224 | - a slice range |
| 225 | - pd.Index for specific indexes |
| 226 | |
| 227 | Following conflicts may occur |
| 228 | |
| 229 | - Does ["20200101", "20210101"] mean selecting this slice or these two days? |
| 230 | |
| 231 | - slice have higher priorities |
| 232 | |
| 233 | level : Union[str, int] |
| 234 | which index level to select the data |
| 235 | |
| 236 | col_set : Union[str, List[str]] |
| 237 | |
| 238 | - if isinstance(col_set, str): |
| 239 | |
| 240 | select a set of meaningful, pd.Index columns.(e.g. features, columns) |
| 241 | |
| 242 | - if col_set == CS_RAW: |
| 243 | |
| 244 | the raw dataset will be returned. |
| 245 | |
| 246 | - if isinstance(col_set, List[str]): |
| 247 | |
| 248 | select several sets of meaningful columns, the returned data has multiple levels |
| 249 | |
| 250 | proc_func: Callable |
| 251 | |
| 252 | - Give a hook for processing data before fetching |
| 253 | - An example to explain the necessity of the hook: |
| 254 | |
| 255 | - A Dataset learned some processors to process data which is related to data segmentation |