MCPcopy
hub / github.com/microsoft/qlib / prepare_data

Function prepare_data

examples/portfolio/prepare_riskdata.py:11–44  ·  view source on GitHub ↗
(riskdata_root="./riskdata", T=240, start_time="2016-01-01")

Source from the content-addressed store, hash-verified

9
10
11def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
12 universe = D.features(D.instruments("csi300"), ["$close"], start_time=start_time).swaplevel().sort_index()
13
14 price_all = (
15 D.features(D.instruments("all"), ["$close"], start_time=start_time).squeeze().unstack(level="instrument")
16 )
17
18 # StructuredCovEstimator is a statistical risk model
19 riskmodel = StructuredCovEstimator()
20
21 for i in range(T - 1, len(price_all)):
22 date = price_all.index[i]
23 ref_date = price_all.index[i - T + 1]
24
25 print(date)
26
27 codes = universe.loc[date].index
28 price = price_all.loc[ref_date:date, codes]
29
30 # calculate return and remove extreme return
31 ret = price.pct_change()
32 ret.clip(ret.quantile(0.025), ret.quantile(0.975), axis=1, inplace=True)
33
34 # run risk model
35 F, cov_b, var_u = riskmodel.predict(ret, is_price=False, return_decomposed_components=True)
36
37 # save risk data
38 root = riskdata_root + "/" + date.strftime("%Y%m%d")
39 os.makedirs(root, exist_ok=True)
40
41 pd.DataFrame(F, index=codes).to_pickle(root + "/factor_exp.pkl")
42 pd.DataFrame(cov_b).to_pickle(root + "/factor_cov.pkl")
43 # for specific_risk we follow the convention to save volatility
44 pd.Series(np.sqrt(var_u), index=codes).to_pickle(root + "/specific_risk.pkl")
45
46
47if __name__ == "__main__":

Callers 1

Calls 7

featuresMethod · 0.80
clipMethod · 0.80
sort_indexMethod · 0.45
instrumentsMethod · 0.45
predictMethod · 0.45
to_pickleMethod · 0.45

Tested by

no test coverage detected