Generate a Python file with Entity and FeatureView definitions. This file is compatible with `feast apply` CLI. Args: repo_path: Path to the feature repo directory. feature_view_name: Name of the feature view to create. vector_length: Dimension of the embedding
(
repo_path: str,
feature_view_name: str = "text_feature_view",
vector_length: int = 384,
)
| 54 | |
| 55 | |
| 56 | def generate_repo_file( |
| 57 | repo_path: str, |
| 58 | feature_view_name: str = "text_feature_view", |
| 59 | vector_length: int = 384, |
| 60 | ) -> str: |
| 61 | """ |
| 62 | Generate a Python file with Entity and FeatureView definitions. |
| 63 | |
| 64 | This file is compatible with `feast apply` CLI. |
| 65 | |
| 66 | Args: |
| 67 | repo_path: Path to the feature repo directory. |
| 68 | feature_view_name: Name of the feature view to create. |
| 69 | vector_length: Dimension of the embedding vectors. Should match the |
| 70 | output dimension of the embedding model being used. Defaults to |
| 71 | 384 (matching the default all-MiniLM-L6-v2 model). |
| 72 | |
| 73 | Returns: |
| 74 | Path to generated file. |
| 75 | """ |
| 76 | from feast.repo_operations import is_valid_name |
| 77 | |
| 78 | if not is_valid_name(feature_view_name) or not feature_view_name.isidentifier(): |
| 79 | raise ValueError( |
| 80 | f"feature_view_name '{feature_view_name}' is invalid. " |
| 81 | "It should only contain alphanumeric characters, underscores, " |
| 82 | "and must not start with an underscore." |
| 83 | ) |
| 84 | code = f'''""" |
| 85 | Auto-generated by DocEmbedder. |
| 86 | Compatible with `feast apply` CLI. |
| 87 | """ |
| 88 | from datetime import timedelta |
| 89 | |
| 90 | from feast import Entity, FeatureView, Field, FileSource |
| 91 | from feast.types import Array, Float32, String, ValueType |
| 92 | |
| 93 | |
| 94 | # Entity |
| 95 | text_entity = Entity( |
| 96 | name="passage_id", |
| 97 | join_keys=["passage_id"], |
| 98 | description="Passage identifier", |
| 99 | value_type=ValueType.STRING, |
| 100 | ) |
| 101 | |
| 102 | # Source |
| 103 | {feature_view_name.replace(" ", "_").replace("-", "_")}_source = FileSource( |
| 104 | name="{feature_view_name}_source", |
| 105 | path="data/{feature_view_name}.parquet", |
| 106 | timestamp_field="event_timestamp", |
| 107 | ) |
| 108 | |
| 109 | # FeatureView |
| 110 | {feature_view_name.replace(" ", "_").replace("-", "_")} = FeatureView( |
| 111 | name="{feature_view_name}", |
| 112 | entities=[text_entity], |
| 113 | ttl=timedelta(days=1), |
no test coverage detected