Example df generated by this function: | event_timestamp | customer_id | current_balance | avg_passenger_count | lifetime_trip_count | created | |------------------+-------------+-----------------+---------------------+---------------------+------------------| | 2021-03-1
(customers, start_date, end_date)
| 179 | |
| 180 | |
| 181 | def create_customer_daily_profile_df(customers, start_date, end_date) -> pd.DataFrame: |
| 182 | """ |
| 183 | Example df generated by this function: |
| 184 | |
| 185 | | event_timestamp | customer_id | current_balance | avg_passenger_count | lifetime_trip_count | created | |
| 186 | |------------------+-------------+-----------------+---------------------+---------------------+------------------| |
| 187 | | 2021-03-17 19:31 | 1010 | 0.889188 | 0.049057 | 412 | 2021-03-24 19:38 | |
| 188 | | 2021-03-18 19:31 | 1010 | 0.979273 | 0.212630 | 639 | 2021-03-24 19:38 | |
| 189 | | 2021-03-19 19:31 | 1010 | 0.976549 | 0.176881 | 70 | 2021-03-24 19:38 | |
| 190 | | 2021-03-20 19:31 | 1010 | 0.273697 | 0.325012 | 68 | 2021-03-24 19:38 | |
| 191 | | 2021-03-21 19:31 | 1010 | 0.438262 | 0.313009 | 192 | 2021-03-24 19:38 | |
| 192 | | | ... | ... | ... | ... | | |
| 193 | | 2021-03-19 19:31 | 1001 | 0.738860 | 0.857422 | 344 | 2021-03-24 19:38 | |
| 194 | | 2021-03-20 19:31 | 1001 | 0.848397 | 0.745989 | 106 | 2021-03-24 19:38 | |
| 195 | | 2021-03-21 19:31 | 1001 | 0.301552 | 0.185873 | 812 | 2021-03-24 19:38 | |
| 196 | | 2021-03-22 19:31 | 1001 | 0.943030 | 0.561219 | 322 | 2021-03-24 19:38 | |
| 197 | | 2021-03-23 19:31 | 1001 | 0.354919 | 0.810093 | 273 | 2021-03-24 19:38 | |
| 198 | """ |
| 199 | df_daily = pd.DataFrame( |
| 200 | { |
| 201 | "event_timestamp": [ |
| 202 | pd.Timestamp(dt, unit="ms").round("ms") |
| 203 | for dt in pd.date_range( |
| 204 | start=start_date, |
| 205 | end=end_date, |
| 206 | freq="1D", |
| 207 | inclusive="left", |
| 208 | tz="UTC", |
| 209 | ) |
| 210 | ] |
| 211 | } |
| 212 | ) |
| 213 | df_all_customers = pd.DataFrame() |
| 214 | |
| 215 | for customer in customers: |
| 216 | df_daily_copy = df_daily.copy() |
| 217 | df_daily_copy["customer_id"] = customer |
| 218 | df_all_customers = pd.concat([df_daily_copy, df_all_customers]) |
| 219 | |
| 220 | df_all_customers.reset_index(drop=True, inplace=True) |
| 221 | |
| 222 | rows = df_all_customers["event_timestamp"].count() |
| 223 | |
| 224 | df_all_customers["current_balance"] = np.random.random(size=rows).astype(np.float32) |
| 225 | df_all_customers["avg_passenger_count"] = np.random.random(size=rows).astype( |
| 226 | np.float32 |
| 227 | ) |
| 228 | df_all_customers["lifetime_trip_count"] = np.random.randint( |
| 229 | 0, 1000, size=rows |
| 230 | ).astype(np.int32) |
| 231 | |
| 232 | # TODO: Remove created timestamp in order to test whether its really optional |
| 233 | df_all_customers["created"] = pd.to_datetime(pd.Timestamp.now(tz=None).round("ms")) |
| 234 | return df_all_customers |
| 235 | |
| 236 | |
| 237 | def create_location_stats_df(locations, start_date, end_date) -> pd.DataFrame: |