MCPcopy Index your code
hub / github.com/tanelpoder/0xtools / XCaptureDataSource

Class XCaptureDataSource

xtop/core/data_source.py:15–285  ·  view source on GitHub ↗

Manages access to xcapture CSV files via DuckDB

Source from the content-addressed store, hash-verified

13
14
15class XCaptureDataSource:
16 """Manages access to xcapture CSV files via DuckDB"""
17
18 def __init__(self, datadir: str, duckdb_threads: Optional[int] = None):
19 """
20 Initialize data source with directory containing CSV files.
21
22 Args:
23 datadir: Directory containing CSV files
24 duckdb_threads: Number of DuckDB threads (None for default, 1 for deterministic)
25 """
26 self.datadir = Path(datadir)
27 self.conn = None
28 self.duckdb_threads = duckdb_threads
29 self.available_columns = {} # Lowercase -> actual column name mapping
30 self.csv_metadata = {}
31 self.schema_info: Dict[str, List[Tuple[str, str]]] = {}
32 self.csv_filter = CSVTimeFilter(self.datadir)
33
34 # Validate datadir exists
35 if not self.datadir.exists():
36 raise ValueError(f"Data directory does not exist: {datadir}")
37
38 def connect(self):
39 """Get or create DuckDB connection"""
40 if self.conn is None:
41 self.conn = duckdb.connect(':memory:')
42 # Configure thread count if specified
43 if self.duckdb_threads is not None:
44 self.conn.execute(f"SET threads TO {self.duckdb_threads}")
45 return self.conn
46
47 def close(self):
48 """Close DuckDB connection"""
49 if self.conn:
50 self.conn.close()
51 self.conn = None
52
53 def discover_columns(self) -> Dict[str, Dict[str, str]]:
54 """
55 Discover available columns from all CSV types.
56 Returns dict: {csv_type: {COLUMN_UPPER: actual_column_name}}
57 """
58 if self.available_columns:
59 return self.available_columns
60
61 conn = self.connect()
62
63 # CSV file patterns in priority order
64 csv_patterns = {
65 'samples': 'xcapture_samples_*.csv',
66 'syscend': 'xcapture_syscend_*.csv',
67 'iorqend': 'xcapture_iorqend_*.csv',
68 'kstacks': 'xcapture_kstacks_*.csv',
69 'ustacks': 'xcapture_ustacks_*.csv'
70 }
71
72 for csv_type, pattern in csv_patterns.items():

Calls

no outgoing calls

Tested by 2

test_query_executionFunction · 0.72