MCPcopy
hub / github.com/microsoft/qlib / dataset

Method dataset

qlib/data/data.py:1040–1137  ·  view source on GitHub ↗
(
        self,
        instruments,
        fields,
        start_time=None,
        end_time=None,
        freq="day",
        disk_cache=0,
        return_uri=False,
        inst_processors=[],
    )

Source from the content-addressed store, hash-verified

1038 self.queue = queue.Queue()
1039
1040 def dataset(
1041 self,
1042 instruments,
1043 fields,
1044 start_time=None,
1045 end_time=None,
1046 freq="day",
1047 disk_cache=0,
1048 return_uri=False,
1049 inst_processors=[],
1050 ):
1051 if Inst.get_inst_type(instruments) == Inst.DICT:
1052 get_module_logger("data").warning(
1053 "Getting features from a dict of instruments is not recommended because the features will not be "
1054 "cached! "
1055 "The dict of instruments will be cleaned every day."
1056 )
1057
1058 if disk_cache == 0:
1059 """
1060 Call the server to generate the expression cache.
1061 Then load the data from the expression cache directly.
1062 - default using multi-kernel method.
1063
1064 """
1065 self.conn.send_request(
1066 request_type="feature",
1067 request_content={
1068 "instruments": instruments,
1069 "fields": fields,
1070 "start_time": start_time,
1071 "end_time": end_time,
1072 "freq": freq,
1073 "disk_cache": 0,
1074 },
1075 msg_queue=self.queue,
1076 )
1077 feature_uri = self.queue.get(timeout=C["timeout"])
1078 if isinstance(feature_uri, Exception):
1079 raise feature_uri
1080 else:
1081 instruments_d = self.get_instruments_d(instruments, freq)
1082 column_names = self.get_column_names(fields)
1083 cal = Cal.calendar(start_time, end_time, freq)
1084 if len(cal) == 0:
1085 return pd.DataFrame(
1086 index=pd.MultiIndex.from_arrays([[], []], names=("instrument", "datetime")),
1087 columns=column_names,
1088 )
1089 start_time = cal[0]
1090 end_time = cal[-1]
1091
1092 data = self.dataset_processor(instruments_d, column_names, start_time, end_time, freq, inst_processors)
1093 if return_uri:
1094 return data, feature_uri
1095 else:
1096 return data
1097 else:

Callers

nothing calls this directly

Calls 9

get_inst_typeMethod · 0.80
send_requestMethod · 0.80
get_instruments_dMethod · 0.80
get_column_namesMethod · 0.80
dataset_processorMethod · 0.80
get_data_uriMethod · 0.80
read_data_from_cacheMethod · 0.80
getMethod · 0.45
calendarMethod · 0.45

Tested by

no test coverage detected