| 460 | ) |
| 461 | |
| 462 | def _load_all_source_data(self): |
| 463 | # NOTE: Need more memory |
| 464 | logger.info("start load all source data....") |
| 465 | all_df = [] |
| 466 | |
| 467 | def _read_df(file_path: Path): |
| 468 | _df = read_as_df(file_path) |
| 469 | if self.date_field_name in _df.columns and not np.issubdtype( |
| 470 | _df[self.date_field_name].dtype, np.datetime64 |
| 471 | ): |
| 472 | _df[self.date_field_name] = pd.to_datetime(_df[self.date_field_name]) |
| 473 | if self.symbol_field_name not in _df.columns: |
| 474 | _df[self.symbol_field_name] = self.get_symbol_from_file(file_path) |
| 475 | return _df |
| 476 | |
| 477 | with tqdm(total=len(self.df_files)) as p_bar: |
| 478 | with ThreadPoolExecutor(max_workers=self.works) as executor: |
| 479 | for df in executor.map(_read_df, self.df_files): |
| 480 | if not df.empty: |
| 481 | all_df.append(df) |
| 482 | p_bar.update() |
| 483 | |
| 484 | logger.info("end of load all data.\n") |
| 485 | return pd.concat(all_df, sort=False) |
| 486 | |
| 487 | def _dump_calendars(self): |
| 488 | pass |