parse instruments, eg: csi300.txt Examples ------- $ python collector.py parse_instruments --index_name CSI300 --qlib_dir ~/.qlib/qlib_data/cn_data
(self)
| 202 | return df |
| 203 | |
| 204 | def parse_instruments(self): |
| 205 | """parse instruments, eg: csi300.txt |
| 206 | |
| 207 | Examples |
| 208 | ------- |
| 209 | $ python collector.py parse_instruments --index_name CSI300 --qlib_dir ~/.qlib/qlib_data/cn_data |
| 210 | """ |
| 211 | logger.info(f"start parse {self.index_name.lower()} companies.....") |
| 212 | instruments_columns = [self.SYMBOL_FIELD_NAME, self.START_DATE_FIELD, self.END_DATE_FIELD] |
| 213 | changers_df = self.get_changes() |
| 214 | new_df = self.get_new_companies() |
| 215 | if new_df is None or new_df.empty: |
| 216 | raise ValueError(f"get new companies error: {self.index_name}") |
| 217 | new_df = new_df.copy() |
| 218 | logger.info("parse history companies by changes......") |
| 219 | for _row in tqdm(changers_df.sort_values(self.DATE_FIELD_NAME, ascending=False).itertuples(index=False)): |
| 220 | if _row.type == self.ADD: |
| 221 | min_end_date = new_df.loc[new_df[self.SYMBOL_FIELD_NAME] == _row.symbol, self.END_DATE_FIELD].min() |
| 222 | new_df.loc[ |
| 223 | (new_df[self.END_DATE_FIELD] == min_end_date) & (new_df[self.SYMBOL_FIELD_NAME] == _row.symbol), |
| 224 | self.START_DATE_FIELD, |
| 225 | ] = _row.date |
| 226 | else: |
| 227 | _tmp_df = pd.DataFrame([[_row.symbol, self.bench_start_date, _row.date]], columns=instruments_columns) |
| 228 | new_df = pd.concat([new_df, _tmp_df], sort=False) |
| 229 | |
| 230 | inst_df = new_df.loc[:, instruments_columns] |
| 231 | _inst_prefix = self.INST_PREFIX.strip() |
| 232 | if _inst_prefix: |
| 233 | inst_df["save_inst"] = inst_df[self.SYMBOL_FIELD_NAME].apply(lambda x: f"{_inst_prefix}{x}") |
| 234 | inst_df = self.format_datetime(inst_df) |
| 235 | inst_df.to_csv( |
| 236 | self.instruments_dir.joinpath(f"{self.index_name.lower()}.txt"), sep="\t", index=False, header=None |
| 237 | ) |
| 238 | logger.info(f"parse {self.index_name.lower()} companies finished.") |
nothing calls this directly
no test coverage detected