(riskdata_root="./riskdata", T=240, start_time="2016-01-01")
| 9 | |
| 10 | |
| 11 | def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"): |
| 12 | universe = D.features(D.instruments("csi300"), ["$close"], start_time=start_time).swaplevel().sort_index() |
| 13 | |
| 14 | price_all = ( |
| 15 | D.features(D.instruments("all"), ["$close"], start_time=start_time).squeeze().unstack(level="instrument") |
| 16 | ) |
| 17 | |
| 18 | # StructuredCovEstimator is a statistical risk model |
| 19 | riskmodel = StructuredCovEstimator() |
| 20 | |
| 21 | for i in range(T - 1, len(price_all)): |
| 22 | date = price_all.index[i] |
| 23 | ref_date = price_all.index[i - T + 1] |
| 24 | |
| 25 | print(date) |
| 26 | |
| 27 | codes = universe.loc[date].index |
| 28 | price = price_all.loc[ref_date:date, codes] |
| 29 | |
| 30 | # calculate return and remove extreme return |
| 31 | ret = price.pct_change() |
| 32 | ret.clip(ret.quantile(0.025), ret.quantile(0.975), axis=1, inplace=True) |
| 33 | |
| 34 | # run risk model |
| 35 | F, cov_b, var_u = riskmodel.predict(ret, is_price=False, return_decomposed_components=True) |
| 36 | |
| 37 | # save risk data |
| 38 | root = riskdata_root + "/" + date.strftime("%Y%m%d") |
| 39 | os.makedirs(root, exist_ok=True) |
| 40 | |
| 41 | pd.DataFrame(F, index=codes).to_pickle(root + "/factor_exp.pkl") |
| 42 | pd.DataFrame(cov_b).to_pickle(root + "/factor_cov.pkl") |
| 43 | # for specific_risk we follow the convention to save volatility |
| 44 | pd.Series(np.sqrt(var_u), index=codes).to_pickle(root + "/specific_risk.pkl") |
| 45 | |
| 46 | |
| 47 | if __name__ == "__main__": |
no test coverage detected