Parse an SQL query using DuckDB. Returns parse result and unexpected errors. Note: - Only SELECT statements support json_serialize_sql - Invalid function names do not throw errors - Some syntax errors do not throw errors since they are not errors in the AST parser
(
query: str,
)
| 94 | |
| 95 | |
| 96 | def _parse_sql_duckdb( |
| 97 | query: str, |
| 98 | ) -> tuple[SqlParseResult | None, str | None]: |
| 99 | """Parse an SQL query using DuckDB. Returns parse result and unexpected errors. |
| 100 | |
| 101 | Note: |
| 102 | - Only SELECT statements support json_serialize_sql |
| 103 | - Invalid function names do not throw errors |
| 104 | - Some syntax errors do not throw errors since they are not errors in the AST parser |
| 105 | """ |
| 106 | if not DependencyManager.duckdb.has(): |
| 107 | return None, "DuckDB not installed" |
| 108 | |
| 109 | import duckdb |
| 110 | |
| 111 | json_serialize_query = ( |
| 112 | JSON_SERIALIZE_QUERY |
| 113 | if duckdb.__version__ >= "1.1.0" |
| 114 | else JSON_SERIALIZE_LEGACY_QUERY |
| 115 | ) |
| 116 | |
| 117 | idx_to_offset_dict: dict[int, int] = {} |
| 118 | try: |
| 119 | query, idx_to_offset_dict = replace_brackets_with_quotes(query) |
| 120 | except Exception as e: |
| 121 | LOGGER.debug(f"Error sanitizing SQL query: {e}") |
| 122 | |
| 123 | relation = duckdb.execute(json_serialize_query, [query]) |
| 124 | fetch_result = relation.fetchone() |
| 125 | if fetch_result is None: |
| 126 | return None, "No result from DuckDB parse query" |
| 127 | |
| 128 | parse_response = fetch_result[0] |
| 129 | parsed_error = msgspec.json.decode(parse_response, type=DuckDBParseError) |
| 130 | |
| 131 | if not parsed_error.error: |
| 132 | return SqlParseResult(success=True, errors=[]), None |
| 133 | |
| 134 | if parsed_error.error_type == "not implemented": |
| 135 | # This is a valid query, but not supported by DuckDB |
| 136 | # Only SELECT statements support json_serialize_sql |
| 137 | return SqlParseResult(success=True, errors=[]), None |
| 138 | |
| 139 | position = int(parsed_error.position or 0) |
| 140 | subquery = query[:position] |
| 141 | line_number = subquery.count("\n") + 1 |
| 142 | |
| 143 | last_newline_idx = subquery.rfind("\n") |
| 144 | column_number = position - last_newline_idx - 1 |
| 145 | |
| 146 | # Adjust column_number to account for any added quotes from bracket replacements |
| 147 | # SELECT {id} FRO users |
| 148 | # ^ error position should be here |
| 149 | # SELECT {id} FRO users |
| 150 | # ^ user sees this |
| 151 | # So in this case, we subtract the offset from the column position. |
| 152 | # If the error is before the brackets, we don't need to add the offset because it just increases the string length |
| 153 | # Column position will be the same as the user sees. |
no test coverage detected
searching dependent graphs…