calc paused num This method adds the paused_num field - The `paused_num` is the number of consecutive days of trading suspension.
(df: pd.DataFrame, _date_field_name, _symbol_field_name)
| 795 | |
| 796 | |
| 797 | def calc_paused_num(df: pd.DataFrame, _date_field_name, _symbol_field_name): |
| 798 | """calc paused num |
| 799 | This method adds the paused_num field |
| 800 | - The `paused_num` is the number of consecutive days of trading suspension. |
| 801 | """ |
| 802 | _symbol = df.iloc[0][_symbol_field_name] |
| 803 | df = df.copy() |
| 804 | df["_tmp_date"] = df[_date_field_name].apply(lambda x: pd.Timestamp(x).date()) |
| 805 | # remove data that starts and ends with `np.nan` all day |
| 806 | all_data = [] |
| 807 | # Record the number of consecutive trading days where the whole day is nan, to remove the last trading day where the whole day is nan |
| 808 | all_nan_nums = 0 |
| 809 | # Record the number of consecutive occurrences of trading days that are not nan throughout the day |
| 810 | not_nan_nums = 0 |
| 811 | for _date, _df in df.groupby("_tmp_date", group_keys=False): |
| 812 | _df["paused"] = 0 |
| 813 | if not _df.loc[_df["volume"] < 0].empty: |
| 814 | logger.warning(f"volume < 0, will fill np.nan: {_date} {_symbol}") |
| 815 | _df.loc[_df["volume"] < 0, "volume"] = np.nan |
| 816 | |
| 817 | check_fields = set(_df.columns) - { |
| 818 | "_tmp_date", |
| 819 | "paused", |
| 820 | "factor", |
| 821 | _date_field_name, |
| 822 | _symbol_field_name, |
| 823 | } |
| 824 | if _df.loc[:, list(check_fields)].isna().values.all() or (_df["volume"] == 0).all(): |
| 825 | all_nan_nums += 1 |
| 826 | not_nan_nums = 0 |
| 827 | _df["paused"] = 1 |
| 828 | if all_data: |
| 829 | _df["paused_num"] = not_nan_nums |
| 830 | all_data.append(_df) |
| 831 | else: |
| 832 | all_nan_nums = 0 |
| 833 | not_nan_nums += 1 |
| 834 | _df["paused_num"] = not_nan_nums |
| 835 | all_data.append(_df) |
| 836 | all_data = all_data[: len(all_data) - all_nan_nums] |
| 837 | if all_data: |
| 838 | df = pd.concat(all_data, sort=False) |
| 839 | else: |
| 840 | logger.warning(f"data is empty: {_symbol}") |
| 841 | df = pd.DataFrame() |
| 842 | return df |
| 843 | del df["_tmp_date"] |
| 844 | return df |
| 845 | |
| 846 | |
| 847 | if __name__ == "__main__": |