MCPcopy
hub / github.com/StructuredLabs/preswald / CSVSource

Class CSVSource

preswald/engine/managers/data.py:138–166  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

136
137
138class CSVSource(DataSource):
139 def __init__(
140 self, name: str, config: CSVConfig, duckdb_conn: duckdb.DuckDBPyConnection
141 ):
142 super().__init__(name, duckdb_conn)
143 self.path = config.path
144
145 # Create a table in DuckDB for this CSV
146 self._table_name = f"csv_{uuid.uuid4().hex[:8]}"
147 self._duckdb.execute(f"""
148 CREATE TABLE {self._table_name} AS
149 SELECT * FROM read_csv_auto('{self.path}',
150 header=true,
151 auto_detect=true,
152 ignore_errors=true,
153 normalize_names=false,
154 sample_size=-1,
155 all_varchar=true
156 )
157 """)
158
159 def query(self, sql: str) -> pd.DataFrame:
160 # Replace source name with actual table name in query
161 sql = sql.replace(self.name, self._table_name)
162 return self._duckdb.execute(sql).df()
163
164 def to_df(self) -> pd.DataFrame:
165 """Get entire CSV as a DataFrame"""
166 return self._duckdb.execute(f"SELECT * FROM {self._table_name}").df()
167
168
169class JSONSource(DataSource):

Callers 2

connectMethod · 0.85
_get_or_create_sourceMethod · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected