A BigQuerySource object defines a data source that a BigQueryOfflineStore class can use.
| 21 | |
| 22 | @typechecked |
| 23 | class BigQuerySource(DataSource): |
| 24 | """A BigQuerySource object defines a data source that a BigQueryOfflineStore class can use.""" |
| 25 | |
| 26 | def source_type(self) -> DataSourceProto.SourceType.ValueType: |
| 27 | return DataSourceProto.BATCH_BIGQUERY |
| 28 | |
| 29 | def __init__( |
| 30 | self, |
| 31 | *, |
| 32 | name: Optional[str] = None, |
| 33 | timestamp_field: Optional[str] = None, |
| 34 | table: Optional[str] = None, |
| 35 | created_timestamp_column: Optional[str] = "", |
| 36 | field_mapping: Optional[Dict[str, str]] = None, |
| 37 | date_partition_column: Optional[str] = None, |
| 38 | timestamp_field_type: Optional[str] = None, |
| 39 | query: Optional[str] = None, |
| 40 | description: Optional[str] = "", |
| 41 | tags: Optional[Dict[str, str]] = None, |
| 42 | owner: Optional[str] = "", |
| 43 | ): |
| 44 | """Create a BigQuerySource from an existing table or query. |
| 45 | |
| 46 | Args: |
| 47 | name (optional): Name for the source. Defaults to the table if not specified, in which |
| 48 | case the table must be specified. |
| 49 | timestamp_field (optional): Event timestamp field used for point in time |
| 50 | joins of feature values. |
| 51 | table (optional): BigQuery table where the features are stored. At least one of 'table' |
| 52 | and 'query' must be specified. When both are set, 'query' is used for reads and |
| 53 | 'table' is used as the write destination. |
| 54 | created_timestamp_column (optional): Timestamp column when row was created, used for deduplicating rows. |
| 55 | field_mapping (optional): A dictionary mapping of column names in this data source to feature names in a feature table |
| 56 | or view. Only used for feature columns, not entities or timestamp columns. |
| 57 | date_partition_column (optional): Timestamp column used for partitioning. |
| 58 | timestamp_field_type (optional): Type of the timestamp_field column. |
| 59 | Set to "DATE" when the event timestamp column is a DATE type, |
| 60 | so SQL generation uses date-only comparisons instead of TIMESTAMP(). |
| 61 | query (optional): The query to be executed to obtain the features. When both 'table' |
| 62 | and 'query' are provided, 'query' takes priority for reads. |
| 63 | description (optional): A human-readable description. |
| 64 | tags (optional): A dictionary of key-value pairs to store arbitrary metadata. |
| 65 | owner (optional): The owner of the bigquery source, typically the email of the primary |
| 66 | maintainer. |
| 67 | Example: |
| 68 | >>> from feast import BigQuerySource |
| 69 | >>> my_bigquery_source = BigQuerySource(table="gcp_project:bq_dataset.bq_table") |
| 70 | """ |
| 71 | if table is None and query is None: |
| 72 | raise ValueError('No "table" or "query" argument provided.') |
| 73 | |
| 74 | self.bigquery_options = BigQueryOptions(table=table, query=query) |
| 75 | |
| 76 | # If no name, use the table as the default name. |
| 77 | if name is None and table is None: |
| 78 | raise DataSourceNoNameException() |
| 79 | name = name or table |
| 80 | assert name |
no outgoing calls