A FileSource object defines a data source that a DaskOfflineStore or DuckDBOfflineStore class can use.
| 30 | |
| 31 | @typechecked |
| 32 | class FileSource(DataSource): |
| 33 | """A FileSource object defines a data source that a DaskOfflineStore or DuckDBOfflineStore class can use.""" |
| 34 | |
| 35 | def source_type(self) -> DataSourceProto.SourceType.ValueType: |
| 36 | return DataSourceProto.BATCH_FILE |
| 37 | |
| 38 | def __init__( |
| 39 | self, |
| 40 | *, |
| 41 | path: str, |
| 42 | name: Optional[str] = "", |
| 43 | event_timestamp_column: Optional[str] = "", |
| 44 | file_format: Optional[FileFormat] = None, |
| 45 | created_timestamp_column: Optional[str] = "", |
| 46 | field_mapping: Optional[Dict[str, str]] = None, |
| 47 | s3_endpoint_override: Optional[str] = None, |
| 48 | description: Optional[str] = "", |
| 49 | tags: Optional[Dict[str, str]] = None, |
| 50 | owner: Optional[str] = "", |
| 51 | timestamp_field: Optional[str] = "", |
| 52 | ): |
| 53 | """ |
| 54 | Creates a FileSource object. |
| 55 | |
| 56 | Args: |
| 57 | path: File path to file containing feature data. Must contain an event_timestamp column, entity columns and |
| 58 | feature columns. |
| 59 | name (optional): Name for the file source. Defaults to the path. |
| 60 | event_timestamp_column (optional): (Deprecated in favor of timestamp_field) Event |
| 61 | timestamp column used for point in time joins of feature values. |
| 62 | created_timestamp_column (optional): Timestamp column when row was created, used for deduplicating rows. |
| 63 | file_format (optional): Explicitly set the file format. Allows Feast to bypass inferring the file format. |
| 64 | field_mapping: A dictionary mapping of column names in this data source to feature names in a feature table |
| 65 | or view. Only used for feature columns, not entities or timestamp columns. |
| 66 | s3_endpoint_override (optional): Overrides AWS S3 enpoint with custom S3 storage |
| 67 | description (optional): A human-readable description. |
| 68 | tags (optional): A dictionary of key-value pairs to store arbitrary metadata. |
| 69 | owner (optional): The owner of the file source, typically the email of the primary |
| 70 | maintainer. |
| 71 | timestamp_field (optional): Event timestamp field used for point in time |
| 72 | joins of feature values. |
| 73 | |
| 74 | Examples: |
| 75 | >>> from feast import FileSource |
| 76 | >>> file_source = FileSource(path="my_features.parquet", timestamp_field="event_timestamp") |
| 77 | """ |
| 78 | self.file_options = FileOptions( |
| 79 | file_format=file_format, |
| 80 | uri=path, |
| 81 | s3_endpoint_override=s3_endpoint_override, |
| 82 | ) |
| 83 | |
| 84 | super().__init__( |
| 85 | name=name if name else path, |
| 86 | timestamp_field=timestamp_field, |
| 87 | created_timestamp_column=created_timestamp_column, |
| 88 | field_mapping=field_mapping, |
| 89 | description=description, |
no outgoing calls