| 136 | |
| 137 | |
| 138 | class CSVSource(DataSource): |
| 139 | def __init__( |
| 140 | self, name: str, config: CSVConfig, duckdb_conn: duckdb.DuckDBPyConnection |
| 141 | ): |
| 142 | super().__init__(name, duckdb_conn) |
| 143 | self.path = config.path |
| 144 | |
| 145 | # Create a table in DuckDB for this CSV |
| 146 | self._table_name = f"csv_{uuid.uuid4().hex[:8]}" |
| 147 | self._duckdb.execute(f""" |
| 148 | CREATE TABLE {self._table_name} AS |
| 149 | SELECT * FROM read_csv_auto('{self.path}', |
| 150 | header=true, |
| 151 | auto_detect=true, |
| 152 | ignore_errors=true, |
| 153 | normalize_names=false, |
| 154 | sample_size=-1, |
| 155 | all_varchar=true |
| 156 | ) |
| 157 | """) |
| 158 | |
| 159 | def query(self, sql: str) -> pd.DataFrame: |
| 160 | # Replace source name with actual table name in query |
| 161 | sql = sql.replace(self.name, self._table_name) |
| 162 | return self._duckdb.execute(sql).df() |
| 163 | |
| 164 | def to_df(self) -> pd.DataFrame: |
| 165 | """Get entire CSV as a DataFrame""" |
| 166 | return self._duckdb.execute(f"SELECT * FROM {self._table_name}").df() |
| 167 | |
| 168 | |
| 169 | class JSONSource(DataSource): |
no outgoing calls
no test coverage detected