MCPcopy
hub / github.com/tanelpoder/0xtools / discover_columns

Method discover_columns

xtop/core/data_source.py:53–112  ·  view source on GitHub ↗

Discover available columns from all CSV types. Returns dict: {csv_type: {COLUMN_UPPER: actual_column_name}}

(self)

Source from the content-addressed store, hash-verified

51 self.conn = None
52
53 def discover_columns(self) -> Dict[str, Dict[str, str]]:
54 """
55 Discover available columns from all CSV types.
56 Returns dict: {csv_type: {COLUMN_UPPER: actual_column_name}}
57 """
58 if self.available_columns:
59 return self.available_columns
60
61 conn = self.connect()
62
63 # CSV file patterns in priority order
64 csv_patterns = {
65 'samples': 'xcapture_samples_*.csv',
66 'syscend': 'xcapture_syscend_*.csv',
67 'iorqend': 'xcapture_iorqend_*.csv',
68 'kstacks': 'xcapture_kstacks_*.csv',
69 'ustacks': 'xcapture_ustacks_*.csv'
70 }
71
72 for csv_type, pattern in csv_patterns.items():
73 self.available_columns[csv_type] = {}
74 self.schema_info[csv_type] = []
75
76 describe_result = None
77 active_pattern = pattern
78 reader = 'read_csv_auto'
79
80 csv_files = self.get_csv_files(pattern)
81 if csv_files:
82 describe_result = self._try_describe(conn, reader, pattern)
83
84 if not describe_result:
85 parquet_pattern = pattern.replace('.csv', '.parquet')
86 parquet_files = self.get_csv_files(parquet_pattern)
87 if parquet_files:
88 reader = 'read_parquet'
89 active_pattern = parquet_pattern
90 describe_result = self._try_describe(conn, reader, parquet_pattern)
91
92 if describe_result:
93 columns = describe_result
94 self.available_columns[csv_type] = {
95 col_name.lower(): col_name for col_name, *_ in columns
96 }
97 self.schema_info[csv_type] = [(col_name, col_type) for col_name, col_type, *_ in columns]
98 self.csv_metadata[csv_type] = {
99 'pattern': active_pattern,
100 'column_count': len(columns),
101 'columns': [col[0] for col in columns],
102 'format': reader.replace('read_', '')
103 }
104 else:
105 self.csv_metadata[csv_type] = {
106 'pattern': pattern,
107 'column_count': 0,
108 'columns': [],
109 'format': None
110 }

Callers 2

get_schema_infoMethod · 0.95
validate_columnsMethod · 0.95

Calls 3

connectMethod · 0.95
get_csv_filesMethod · 0.95
_try_describeMethod · 0.95

Tested by

no test coverage detected