Example df generated by this function: | event_timestamp | location_id | temperature | created | +------------------+-------------+-------------+------------------+ | 2021-03-17 19:31 | 1 | 74 | 2021-03-24 19:38 | | 2021-03-17 20:31 | 24 |
(locations, start_date, end_date)
| 235 | |
| 236 | |
| 237 | def create_location_stats_df(locations, start_date, end_date) -> pd.DataFrame: |
| 238 | """ |
| 239 | Example df generated by this function: |
| 240 | |
| 241 | | event_timestamp | location_id | temperature | created | |
| 242 | +------------------+-------------+-------------+------------------+ |
| 243 | | 2021-03-17 19:31 | 1 | 74 | 2021-03-24 19:38 | |
| 244 | | 2021-03-17 20:31 | 24 | 63 | 2021-03-24 19:38 | |
| 245 | | 2021-03-17 21:31 | 19 | 65 | 2021-03-24 19:38 | |
| 246 | | 2021-03-17 22:31 | 35 | 86 | 2021-03-24 19:38 | |
| 247 | """ |
| 248 | df_hourly = pd.DataFrame( |
| 249 | { |
| 250 | "event_timestamp": [ |
| 251 | pd.Timestamp(dt, unit="ms").round("ms") |
| 252 | for dt in pd.date_range( |
| 253 | start=start_date, |
| 254 | end=end_date, |
| 255 | freq="1h", |
| 256 | inclusive="left", |
| 257 | tz="UTC", |
| 258 | ) |
| 259 | ] |
| 260 | } |
| 261 | ) |
| 262 | df_all_locations = pd.DataFrame() |
| 263 | |
| 264 | for location in locations: |
| 265 | df_hourly_copy = df_hourly.copy() |
| 266 | df_hourly_copy["location_id"] = location |
| 267 | df_all_locations = pd.concat([df_hourly_copy, df_all_locations]) |
| 268 | |
| 269 | df_all_locations.reset_index(drop=True, inplace=True) |
| 270 | rows = df_all_locations["event_timestamp"].count() |
| 271 | |
| 272 | df_all_locations["temperature"] = np.random.randint(50, 100, size=rows).astype( |
| 273 | np.int32 |
| 274 | ) |
| 275 | |
| 276 | # TODO: Remove created timestamp in order to test whether its really optional |
| 277 | df_all_locations["created"] = pd.to_datetime(pd.Timestamp.now(tz=None).round("ms")) |
| 278 | return df_all_locations |
| 279 | |
| 280 | |
| 281 | def create_global_daily_stats_df(start_date, end_date) -> pd.DataFrame: |