(info: dict[str, Any], yaml_abs: str)
| 327 | |
| 328 | |
| 329 | def _nb_historical(info: dict[str, Any], yaml_abs: str) -> dict[str, Any]: |
| 330 | project = info["project"] |
| 331 | |
| 332 | cells: list[dict[str, Any]] = [ |
| 333 | _md( |
| 334 | f"# Historical Features & Training Datasets — `{project}`\n\n" |
| 335 | "Retrieve point-in-time correct feature values to build ML training datasets." |
| 336 | ), |
| 337 | _md("## 1. Feature Store Path"), |
| 338 | _path_setup_cell(yaml_abs), |
| 339 | _md("## 2. Connect to the Feature Store"), |
| 340 | _code( |
| 341 | "from feast import FeatureStore\n" |
| 342 | "\n" |
| 343 | "store = FeatureStore(fs_yaml_file=FEAST_FS_YAML)\n" |
| 344 | "print(f'Project : {store.project}')\n" |
| 345 | "print('Feature views:', [fv.name for fv in store.list_feature_views()])" |
| 346 | ), |
| 347 | _md( |
| 348 | "## 3. Discover Available Features\n\nList feature views and read a sample of entity data." |
| 349 | ), |
| 350 | _code( |
| 351 | "import pandas as pd\n" |
| 352 | "from datetime import datetime, timedelta, timezone\n" |
| 353 | "\n" |
| 354 | "fvs = store.list_feature_views()\n" |
| 355 | "entities = store.list_entities()\n" |
| 356 | "\n" |
| 357 | "if not fvs:\n" |
| 358 | " print('No feature views found — run `feast apply` first.')\n" |
| 359 | "else:\n" |
| 360 | " first_fv = fvs[0]\n" |
| 361 | "\n" |
| 362 | " # Identify the entity join key.\n" |
| 363 | " entity_name = entities[0].join_key if entities else 'entity_id'\n" |
| 364 | " if first_fv.entities:\n" |
| 365 | " fv_entity = next(\n" |
| 366 | " (e for e in entities if e.name in set(first_fv.entities)),\n" |
| 367 | " entities[0] if entities else None,\n" |
| 368 | " )\n" |
| 369 | " if fv_entity:\n" |
| 370 | " entity_name = fv_entity.join_key\n" |
| 371 | "\n" |
| 372 | " # Read latest entity values from the offline store.\n" |
| 373 | " # This uses the same mechanism Feast uses for materialization.\n" |
| 374 | " source = first_fv.batch_source\n" |
| 375 | " provider = store._get_provider()\n" |
| 376 | " sample_df = provider.offline_store.pull_latest_from_table_or_query(\n" |
| 377 | " config=store.config,\n" |
| 378 | " data_source=source,\n" |
| 379 | " join_key_columns=[entity_name],\n" |
| 380 | " feature_name_columns=[f.name for f in first_fv.features],\n" |
| 381 | " timestamp_field=source.timestamp_field,\n" |
| 382 | " created_timestamp_column=source.created_timestamp_column or '',\n" |
| 383 | " start_date=datetime(2000, 1, 1, tzinfo=timezone.utc),\n" |
| 384 | " end_date=datetime.now(tz=timezone.utc),\n" |
| 385 | " ).to_df()\n" |
| 386 | "\n" |
no test coverage detected