MCPcopy
hub / github.com/microsoft/qlib / _load_all_source_data

Method _load_all_source_data

scripts/dump_bin.py:462–485  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

460 )
461
462 def _load_all_source_data(self):
463 # NOTE: Need more memory
464 logger.info("start load all source data....")
465 all_df = []
466
467 def _read_df(file_path: Path):
468 _df = read_as_df(file_path)
469 if self.date_field_name in _df.columns and not np.issubdtype(
470 _df[self.date_field_name].dtype, np.datetime64
471 ):
472 _df[self.date_field_name] = pd.to_datetime(_df[self.date_field_name])
473 if self.symbol_field_name not in _df.columns:
474 _df[self.symbol_field_name] = self.get_symbol_from_file(file_path)
475 return _df
476
477 with tqdm(total=len(self.df_files)) as p_bar:
478 with ThreadPoolExecutor(max_workers=self.works) as executor:
479 for df in executor.map(_read_df, self.df_files):
480 if not df.empty:
481 all_df.append(df)
482 p_bar.update()
483
484 logger.info("end of load all data.\n")
485 return pd.concat(all_df, sort=False)
486
487 def _dump_calendars(self):
488 pass

Callers 1

__init__Method · 0.95

Calls 2

infoMethod · 0.45
updateMethod · 0.45

Tested by

no test coverage detected