(
self,
repo_path: str,
yaml_file: str = "feature_store.yaml",
feature_view_name: str = "text_feature_view",
chunker: Optional[BaseChunker] = None,
embedder: Optional[BaseEmbedder] = None,
schema_transform_fn: SchemaTransformFn = default_schema_transform_fn,
create_feature_view: bool = True,
vector_length: Optional[int] = None,
auto_apply_repo: bool = True,
)
| 166 | """ |
| 167 | |
| 168 | def __init__( |
| 169 | self, |
| 170 | repo_path: str, |
| 171 | yaml_file: str = "feature_store.yaml", |
| 172 | feature_view_name: str = "text_feature_view", |
| 173 | chunker: Optional[BaseChunker] = None, |
| 174 | embedder: Optional[BaseEmbedder] = None, |
| 175 | schema_transform_fn: SchemaTransformFn = default_schema_transform_fn, |
| 176 | create_feature_view: bool = True, |
| 177 | vector_length: Optional[int] = None, |
| 178 | auto_apply_repo: bool = True, |
| 179 | ): |
| 180 | self.repo_path = repo_path |
| 181 | self.yaml_path = os.path.join(Path(repo_path).resolve(), yaml_file) |
| 182 | self.feature_view_name = feature_view_name |
| 183 | self.chunker = chunker or TextChunker() |
| 184 | self.embedder = embedder or MultiModalEmbedder() |
| 185 | self.store: Optional[FeatureStore] = None |
| 186 | |
| 187 | sig = inspect.signature(schema_transform_fn) |
| 188 | params = list(sig.parameters.values()) |
| 189 | if ( |
| 190 | len(params) != 1 |
| 191 | or params[0].annotation != pd.DataFrame |
| 192 | or sig.return_annotation != pd.DataFrame |
| 193 | ): |
| 194 | raise ValueError( |
| 195 | "schema_transform_fn must be a function that takes a DataFrame and returns a DataFrame" |
| 196 | ) |
| 197 | self.schema_transform_fn = schema_transform_fn |
| 198 | if create_feature_view: |
| 199 | resolved_vector_length = self._resolve_vector_length(vector_length, "text") |
| 200 | generate_repo_file( |
| 201 | repo_path=repo_path, |
| 202 | feature_view_name=feature_view_name, |
| 203 | vector_length=resolved_vector_length, |
| 204 | ) |
| 205 | if auto_apply_repo: |
| 206 | self.apply_repo() |
| 207 | |
| 208 | def _resolve_vector_length( |
| 209 | self, explicit_length: Optional[int], modality: str |
nothing calls this directly
no test coverage detected