Creates a DataSource object. Args: name: Name of data source, which should be unique within a project. timestamp_field (optional): Event timestamp field used for point-in-time joins of feature values. created_timestamp_column (opt
(
self,
*,
name: str,
timestamp_field: Optional[str] = None,
created_timestamp_column: Optional[str] = None,
field_mapping: Optional[Dict[str, str]] = None,
description: Optional[str] = "",
tags: Optional[Dict[str, str]] = None,
owner: Optional[str] = "",
date_partition_column: Optional[str] = None,
timestamp_field_type: Optional[str] = None,
)
| 210 | last_updated_timestamp: Optional[datetime] |
| 211 | |
| 212 | def __init__( |
| 213 | self, |
| 214 | *, |
| 215 | name: str, |
| 216 | timestamp_field: Optional[str] = None, |
| 217 | created_timestamp_column: Optional[str] = None, |
| 218 | field_mapping: Optional[Dict[str, str]] = None, |
| 219 | description: Optional[str] = "", |
| 220 | tags: Optional[Dict[str, str]] = None, |
| 221 | owner: Optional[str] = "", |
| 222 | date_partition_column: Optional[str] = None, |
| 223 | timestamp_field_type: Optional[str] = None, |
| 224 | ): |
| 225 | """ |
| 226 | Creates a DataSource object. |
| 227 | |
| 228 | Args: |
| 229 | name: Name of data source, which should be unique within a project. |
| 230 | timestamp_field (optional): Event timestamp field used for point-in-time joins of |
| 231 | feature values. |
| 232 | created_timestamp_column (optional): Timestamp column indicating when the row |
| 233 | was created, used for deduplicating rows. |
| 234 | field_mapping (optional): A dictionary mapping of column names in this data |
| 235 | source to feature names in a feature table or view. Only used for feature |
| 236 | columns, not entity or timestamp columns. |
| 237 | description (optional): A human-readable description. |
| 238 | tags (optional): A dictionary of key-value pairs to store arbitrary metadata. |
| 239 | owner (optional): The owner of the data source, typically the email of the primary |
| 240 | maintainer. |
| 241 | date_partition_column (optional): Timestamp column used for partitioning. Not supported by all stores |
| 242 | timestamp_field_type (optional): Type of the timestamp_field column. |
| 243 | Defaults to "TIMESTAMP". Set to "DATE" when the event timestamp column |
| 244 | is a DATE type, so SQL generation uses date-only comparisons. |
| 245 | """ |
| 246 | self.name = name |
| 247 | self.timestamp_field = timestamp_field or "" |
| 248 | self.created_timestamp_column = ( |
| 249 | created_timestamp_column if created_timestamp_column else "" |
| 250 | ) |
| 251 | self.field_mapping = field_mapping if field_mapping else {} |
| 252 | if ( |
| 253 | self.timestamp_field |
| 254 | and self.timestamp_field == self.created_timestamp_column |
| 255 | ): |
| 256 | raise ValueError( |
| 257 | "Please do not use the same column for 'timestamp_field' and 'created_timestamp_column'." |
| 258 | ) |
| 259 | self.description = description or "" |
| 260 | self.tags = tags or {} |
| 261 | self.owner = owner or "" |
| 262 | self.date_partition_column = ( |
| 263 | date_partition_column if date_partition_column else "" |
| 264 | ) |
| 265 | self.timestamp_field_type = timestamp_field_type if timestamp_field_type else "" |
| 266 | now = _utc_now() |
| 267 | self.created_timestamp = now |
| 268 | self.last_updated_timestamp = now |
| 269 |