| 77 | self.file_suffix = file_suffix |
| 78 | |
| 79 | def _compare(self, file_path: Path): |
| 80 | symbol = file_path.name.strip(self.file_suffix) |
| 81 | if symbol.lower() not in self.qlib_symbols: |
| 82 | return self.NOT_IN_FEATURES |
| 83 | # qlib data |
| 84 | qlib_df = D.features([symbol], self.qlib_fields, freq=self.freq) |
| 85 | qlib_df.rename(columns={_c: _c.strip("$") for _c in qlib_df.columns}, inplace=True) |
| 86 | # csv data |
| 87 | origin_df = pd.read_csv(file_path) |
| 88 | origin_df[self.date_field_name] = pd.to_datetime(origin_df[self.date_field_name]) |
| 89 | if self.symbol_field_name not in origin_df.columns: |
| 90 | origin_df[self.symbol_field_name] = symbol |
| 91 | origin_df.set_index([self.symbol_field_name, self.date_field_name], inplace=True) |
| 92 | origin_df.index.names = qlib_df.index.names |
| 93 | origin_df = origin_df.reindex(qlib_df.index) |
| 94 | try: |
| 95 | compare = datacompy.Compare( |
| 96 | origin_df, |
| 97 | qlib_df, |
| 98 | on_index=True, |
| 99 | abs_tol=1e-08, # Optional, defaults to 0 |
| 100 | rel_tol=1e-05, # Optional, defaults to 0 |
| 101 | df1_name="Original", # Optional, defaults to 'df1' |
| 102 | df2_name="New", # Optional, defaults to 'df2' |
| 103 | ) |
| 104 | _r = compare.matches(ignore_extra_columns=True) |
| 105 | return self.COMPARE_TRUE if _r else self.COMPARE_FALSE |
| 106 | except Exception as e: |
| 107 | logger.warning(f"{symbol} compare error: {e}") |
| 108 | return self.COMPARE_ERROR |
| 109 | |
| 110 | def check(self): |
| 111 | """Check whether the bin file after ``dump_bin.py`` is executed is consistent with the original csv file data""" |