MCPcopy Index your code
hub / github.com/feast-dev/feast / bootstrap

Function bootstrap

sdk/python/feast/templates/ray/bootstrap.py:4–89  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

2
3
4def bootstrap():
5 import pathlib
6 from datetime import datetime, timedelta
7
8 import numpy as np
9 import pandas as pd
10
11 from feast.driver_test_data import create_driver_hourly_stats_df
12
13 repo_path = pathlib.Path(__file__).parent.absolute() / "feature_repo"
14 project_name = pathlib.Path(__file__).parent.absolute().name
15 data_path = repo_path / "data"
16 data_path.mkdir(exist_ok=True)
17
18 end_date = datetime.now().replace(microsecond=0, second=0, minute=0)
19 start_date = end_date - timedelta(days=15)
20
21 # Generate driver data using Feast's built-in test data generator
22 driver_entities = [1001, 1002, 1003, 1004, 1005]
23 driver_df = create_driver_hourly_stats_df(driver_entities, start_date, end_date)
24
25 if driver_df["event_timestamp"].dt.tz is None:
26 driver_df["event_timestamp"] = driver_df["event_timestamp"].dt.tz_localize(
27 "UTC"
28 )
29 if "created" in driver_df.columns and driver_df["created"].dt.tz is None:
30 driver_df["created"] = driver_df["created"].dt.tz_localize("UTC")
31
32 driver_stats_path = data_path / "driver_stats.parquet"
33 driver_df.to_parquet(path=str(driver_stats_path), allow_truncated_timestamps=True)
34
35 # Generate customer data to demonstrate Ray's multi-source capabilities
36 customer_entities = [2001, 2002, 2003, 2004, 2005]
37
38 # Create customer daily profile data
39 customer_data = []
40 for customer_id in customer_entities:
41 for i, single_date in enumerate(
42 pd.date_range(start_date, end_date, freq="D", tz="UTC")
43 ):
44 stable_timestamp = single_date.replace(
45 hour=12, minute=0, second=0, microsecond=0
46 )
47 customer_data.append(
48 {
49 "customer_id": customer_id,
50 "event_timestamp": stable_timestamp,
51 "created": stable_timestamp + timedelta(minutes=10),
52 "current_balance": np.random.uniform(10.0, 1000.0),
53 "avg_passenger_count": np.random.uniform(1.0, 4.0),
54 "lifetime_trip_count": np.random.randint(50, 500),
55 }
56 )
57
58 customer_df = pd.DataFrame(customer_data)
59
60 if customer_df["event_timestamp"].dt.tz is None:
61 customer_df["event_timestamp"] = customer_df["event_timestamp"].dt.tz_localize(

Callers 1

bootstrap.pyFile · 0.70

Calls 2

replace_str_in_fileFunction · 0.90

Tested by

no test coverage detected