gen_dataset_cache .. note:: This function does not consider the cache read write lock. Please acquire the lock outside this function The format the cache contains 3 parts(followed by typical filename). - index : cache/d41366901e25de3ec47297f12e2ba11d.index
(self, cache_path: Union[str, Path], instruments, fields, freq, inst_processors=[])
| 854 | return index_data |
| 855 | |
| 856 | def gen_dataset_cache(self, cache_path: Union[str, Path], instruments, fields, freq, inst_processors=[]): |
| 857 | """gen_dataset_cache |
| 858 | |
| 859 | .. note:: This function does not consider the cache read write lock. Please |
| 860 | acquire the lock outside this function |
| 861 | |
| 862 | The format the cache contains 3 parts(followed by typical filename). |
| 863 | |
| 864 | - index : cache/d41366901e25de3ec47297f12e2ba11d.index |
| 865 | |
| 866 | - The content of the file may be in following format(pandas.Series) |
| 867 | |
| 868 | .. code-block:: python |
| 869 | |
| 870 | start end |
| 871 | 1999-11-10 00:00:00 0 1 |
| 872 | 1999-11-11 00:00:00 1 2 |
| 873 | 1999-11-12 00:00:00 2 3 |
| 874 | ... |
| 875 | |
| 876 | .. note:: The start is closed. The end is open!!!!! |
| 877 | |
| 878 | - Each line contains two element <start_index, end_index> with a timestamp as its index. |
| 879 | - It indicates the `start_index` (included) and `end_index` (excluded) of the data for `timestamp` |
| 880 | |
| 881 | - meta data: cache/d41366901e25de3ec47297f12e2ba11d.meta |
| 882 | |
| 883 | - data : cache/d41366901e25de3ec47297f12e2ba11d |
| 884 | |
| 885 | - This is a hdf file sorted by datetime |
| 886 | |
| 887 | :param cache_path: The path to store the cache. |
| 888 | :param instruments: The instruments to store the cache. |
| 889 | :param fields: The fields to store the cache. |
| 890 | :param freq: The freq to store the cache. |
| 891 | :param inst_processors: Instrument processors. |
| 892 | |
| 893 | :return type pd.DataFrame; The fields of the returned DataFrame are consistent with the parameters of the function. |
| 894 | """ |
| 895 | # get calendar |
| 896 | from .data import Cal # pylint: disable=C0415 |
| 897 | |
| 898 | cache_path = Path(cache_path) |
| 899 | _calendar = Cal.calendar(freq=freq) |
| 900 | self.logger.debug(f"Generating dataset cache {cache_path}") |
| 901 | # Make sure the cache runs right when the directory is deleted |
| 902 | # while running |
| 903 | self.clear_cache(cache_path) |
| 904 | |
| 905 | features = self.provider.dataset( |
| 906 | instruments, fields, _calendar[0], _calendar[-1], freq, inst_processors=inst_processors |
| 907 | ) |
| 908 | |
| 909 | if features.empty: |
| 910 | return features |
| 911 | |
| 912 | # swap index and sorted |
| 913 | features = features.swaplevel("instrument", "datetime").sort_index() |
no test coverage detected