MCPcopy
hub / github.com/microsoft/qlib / calc_paused_num

Function calc_paused_num

scripts/data_collector/utils.py:797–844  ·  view source on GitHub ↗

calc paused num This method adds the paused_num field - The `paused_num` is the number of consecutive days of trading suspension.

(df: pd.DataFrame, _date_field_name, _symbol_field_name)

Source from the content-addressed store, hash-verified

795
796
797def calc_paused_num(df: pd.DataFrame, _date_field_name, _symbol_field_name):
798 """calc paused num
799 This method adds the paused_num field
800 - The `paused_num` is the number of consecutive days of trading suspension.
801 """
802 _symbol = df.iloc[0][_symbol_field_name]
803 df = df.copy()
804 df["_tmp_date"] = df[_date_field_name].apply(lambda x: pd.Timestamp(x).date())
805 # remove data that starts and ends with `np.nan` all day
806 all_data = []
807 # Record the number of consecutive trading days where the whole day is nan, to remove the last trading day where the whole day is nan
808 all_nan_nums = 0
809 # Record the number of consecutive occurrences of trading days that are not nan throughout the day
810 not_nan_nums = 0
811 for _date, _df in df.groupby("_tmp_date", group_keys=False):
812 _df["paused"] = 0
813 if not _df.loc[_df["volume"] < 0].empty:
814 logger.warning(f"volume < 0, will fill np.nan: {_date} {_symbol}")
815 _df.loc[_df["volume"] < 0, "volume"] = np.nan
816
817 check_fields = set(_df.columns) - {
818 "_tmp_date",
819 "paused",
820 "factor",
821 _date_field_name,
822 _symbol_field_name,
823 }
824 if _df.loc[:, list(check_fields)].isna().values.all() or (_df["volume"] == 0).all():
825 all_nan_nums += 1
826 not_nan_nums = 0
827 _df["paused"] = 1
828 if all_data:
829 _df["paused_num"] = not_nan_nums
830 all_data.append(_df)
831 else:
832 all_nan_nums = 0
833 not_nan_nums += 1
834 _df["paused_num"] = not_nan_nums
835 all_data.append(_df)
836 all_data = all_data[: len(all_data) - all_nan_nums]
837 if all_data:
838 df = pd.concat(all_data, sort=False)
839 else:
840 logger.warning(f"data is empty: {_symbol}")
841 df = pd.DataFrame()
842 return df
843 del df["_tmp_date"]
844 return df
845
846
847if __name__ == "__main__":

Callers 1

calc_adjusted_priceFunction · 0.85

Calls 5

copyMethod · 0.80
dateMethod · 0.80
isnaMethod · 0.80
applyMethod · 0.45
allMethod · 0.45

Tested by

no test coverage detected