Example df generated by this function: | event_timestamp | driver_id | conv_rate | acc_rate | avg_daily_trips | created | |------------------+-----------+-----------+----------+-----------------+------------------| | 2021-03-17 19:31 | 5010 | 0.229297 | 0.685843 | 8
(drivers, start_date, end_date)
| 86 | |
| 87 | |
| 88 | def create_driver_hourly_stats_df(drivers, start_date, end_date) -> pd.DataFrame: |
| 89 | """ |
| 90 | Example df generated by this function: |
| 91 | |
| 92 | | event_timestamp | driver_id | conv_rate | acc_rate | avg_daily_trips | created | |
| 93 | |------------------+-----------+-----------+----------+-----------------+------------------| |
| 94 | | 2021-03-17 19:31 | 5010 | 0.229297 | 0.685843 | 861 | 2021-03-24 19:34 | |
| 95 | | 2021-03-17 20:31 | 5010 | 0.781655 | 0.861280 | 769 | 2021-03-24 19:34 | |
| 96 | | 2021-03-17 21:31 | 5010 | 0.150333 | 0.525581 | 778 | 2021-03-24 19:34 | |
| 97 | | 2021-03-17 22:31 | 5010 | 0.951701 | 0.228883 | 570 | 2021-03-24 19:34 | |
| 98 | | 2021-03-17 23:31 | 5010 | 0.819598 | 0.262503 | 473 | 2021-03-24 19:34 | |
| 99 | | | ... | ... | ... | ... | | |
| 100 | | 2021-03-24 16:31 | 5001 | 0.061585 | 0.658140 | 477 | 2021-03-24 19:34 | |
| 101 | | 2021-03-24 17:31 | 5001 | 0.088949 | 0.303897 | 618 | 2021-03-24 19:34 | |
| 102 | | 2021-03-24 18:31 | 5001 | 0.096652 | 0.747421 | 480 | 2021-03-24 19:34 | |
| 103 | | 2021-03-17 19:31 | 5005 | 0.142936 | 0.707596 | 466 | 2021-03-24 19:34 | |
| 104 | | 2021-03-17 19:31 | 5005 | 0.142936 | 0.707596 | 466 | 2021-03-24 19:34 | |
| 105 | """ |
| 106 | df_hourly = pd.DataFrame( |
| 107 | { |
| 108 | "event_timestamp": [ |
| 109 | pd.Timestamp(dt, unit="ms").round("ms") |
| 110 | for dt in pd.date_range( |
| 111 | start=start_date, |
| 112 | end=end_date, |
| 113 | freq="1h", |
| 114 | inclusive="left", |
| 115 | tz="UTC", |
| 116 | ) |
| 117 | ] |
| 118 | # include a fixed timestamp for get_historical_features in the quickstart |
| 119 | + [ |
| 120 | pd.Timestamp( |
| 121 | year=2021, month=4, day=12, hour=7, minute=0, second=0, tz="UTC" |
| 122 | ) |
| 123 | ] |
| 124 | } |
| 125 | ) |
| 126 | df_all_drivers = pd.DataFrame() |
| 127 | |
| 128 | for driver in drivers: |
| 129 | df_hourly_copy = df_hourly.copy() |
| 130 | df_hourly_copy["driver_id"] = driver |
| 131 | df_all_drivers = pd.concat([df_hourly_copy, df_all_drivers]) |
| 132 | |
| 133 | df_all_drivers.reset_index(drop=True, inplace=True) |
| 134 | rows = df_all_drivers["event_timestamp"].count() |
| 135 | |
| 136 | df_all_drivers["conv_rate"] = np.random.random(size=rows).astype(np.float32) |
| 137 | df_all_drivers["acc_rate"] = np.random.random(size=rows).astype(np.float32) |
| 138 | df_all_drivers["avg_daily_trips"] = np.random.randint(0, 1000, size=rows).astype( |
| 139 | np.int64 |
| 140 | ) |
| 141 | df_all_drivers["created"] = pd.to_datetime(pd.Timestamp.now(tz=None).round("ms")) |
| 142 | |
| 143 | # Complex type columns for Map, Json, and Struct examples |
| 144 | import json as _json |
| 145 |
no test coverage detected