Manages access to xcapture CSV files via DuckDB
| 13 | |
| 14 | |
| 15 | class XCaptureDataSource: |
| 16 | """Manages access to xcapture CSV files via DuckDB""" |
| 17 | |
| 18 | def __init__(self, datadir: str, duckdb_threads: Optional[int] = None): |
| 19 | """ |
| 20 | Initialize data source with directory containing CSV files. |
| 21 | |
| 22 | Args: |
| 23 | datadir: Directory containing CSV files |
| 24 | duckdb_threads: Number of DuckDB threads (None for default, 1 for deterministic) |
| 25 | """ |
| 26 | self.datadir = Path(datadir) |
| 27 | self.conn = None |
| 28 | self.duckdb_threads = duckdb_threads |
| 29 | self.available_columns = {} # Lowercase -> actual column name mapping |
| 30 | self.csv_metadata = {} |
| 31 | self.schema_info: Dict[str, List[Tuple[str, str]]] = {} |
| 32 | self.csv_filter = CSVTimeFilter(self.datadir) |
| 33 | |
| 34 | # Validate datadir exists |
| 35 | if not self.datadir.exists(): |
| 36 | raise ValueError(f"Data directory does not exist: {datadir}") |
| 37 | |
| 38 | def connect(self): |
| 39 | """Get or create DuckDB connection""" |
| 40 | if self.conn is None: |
| 41 | self.conn = duckdb.connect(':memory:') |
| 42 | # Configure thread count if specified |
| 43 | if self.duckdb_threads is not None: |
| 44 | self.conn.execute(f"SET threads TO {self.duckdb_threads}") |
| 45 | return self.conn |
| 46 | |
| 47 | def close(self): |
| 48 | """Close DuckDB connection""" |
| 49 | if self.conn: |
| 50 | self.conn.close() |
| 51 | self.conn = None |
| 52 | |
| 53 | def discover_columns(self) -> Dict[str, Dict[str, str]]: |
| 54 | """ |
| 55 | Discover available columns from all CSV types. |
| 56 | Returns dict: {csv_type: {COLUMN_UPPER: actual_column_name}} |
| 57 | """ |
| 58 | if self.available_columns: |
| 59 | return self.available_columns |
| 60 | |
| 61 | conn = self.connect() |
| 62 | |
| 63 | # CSV file patterns in priority order |
| 64 | csv_patterns = { |
| 65 | 'samples': 'xcapture_samples_*.csv', |
| 66 | 'syscend': 'xcapture_syscend_*.csv', |
| 67 | 'iorqend': 'xcapture_iorqend_*.csv', |
| 68 | 'kstacks': 'xcapture_kstacks_*.csv', |
| 69 | 'ustacks': 'xcapture_ustacks_*.csv' |
| 70 | } |
| 71 | |
| 72 | for csv_type, pattern in csv_patterns.items(): |
no outgoing calls