MCPcopy
hub / github.com/microsoft/qlib / check_missing_data

Method check_missing_data

scripts/check_data_health.py:71–96  ·  view source on GitHub ↗

Check if any data is missing in the DataFrame.

(self)

Source from the content-addressed store, hash-verified

69 print(df)
70
71 def check_missing_data(self) -> Optional[pd.DataFrame]:
72 """Check if any data is missing in the DataFrame."""
73 result_dict = {
74 "instruments": [],
75 "open": [],
76 "high": [],
77 "low": [],
78 "close": [],
79 "volume": [],
80 }
81 for filename, df in self.data.items():
82 missing_data_columns = df.isnull().sum()[df.isnull().sum() > self.missing_data_num].index.tolist()
83 if len(missing_data_columns) > 0:
84 result_dict["instruments"].append(filename)
85 result_dict["open"].append(df.isnull().sum()["open"])
86 result_dict["high"].append(df.isnull().sum()["high"])
87 result_dict["low"].append(df.isnull().sum()["low"])
88 result_dict["close"].append(df.isnull().sum()["close"])
89 result_dict["volume"].append(df.isnull().sum()["volume"])
90
91 result_df = pd.DataFrame(result_dict).set_index("instruments")
92 if not result_df.empty:
93 return result_df
94 else:
95 logger.info(f"✅ There are no missing data.")
96 return None
97
98 def check_large_step_changes(self) -> Optional[pd.DataFrame]:
99 """Check if there are any large step changes above the threshold in the OHLCV columns."""

Callers 1

check_dataMethod · 0.95

Calls 3

tolistMethod · 0.80
sumMethod · 0.45
infoMethod · 0.45

Tested by

no test coverage detected