Find table and schema references in a SQL statement. Args: sql_statement: The SQL statement to parse. Returns: A set of unique SQLRefs, one for each table reference in the statement. Eg. SELECT * FROM schema1.test_table INNER JOIN schema2.test_table2 wo
(sql_statement: str)
| 478 | |
| 479 | |
| 480 | def find_sql_refs(sql_statement: str) -> set[SQLRef]: |
| 481 | """ |
| 482 | Find table and schema references in a SQL statement. |
| 483 | |
| 484 | Args: |
| 485 | sql_statement: The SQL statement to parse. |
| 486 | |
| 487 | Returns: |
| 488 | A set of unique SQLRefs, one for each table reference in the statement. |
| 489 | Eg. SELECT * FROM schema1.test_table INNER JOIN schema2.test_table2 |
| 490 | would return two SQLRefs, one for the first table and one for the second. |
| 491 | |
| 492 | Note: |
| 493 | When providing only a single qualification, |
| 494 | DuckDB will interpret as either a catalog or a schema, as long as there are no conflicts. |
| 495 | |
| 496 | Eg. SELECT * FROM my_db.my_table, my_db can be a catalog or schema. If a catalog exists, |
| 497 | then it would resolve to my_db.main.my_table. |
| 498 | |
| 499 | At the moment, we don't know this, so my_db is treated as a schema. |
| 500 | """ |
| 501 | |
| 502 | # Use sqlglot to parse ast (https://github.com/tobymao/sqlglot/blob/main/posts/ast_primer.md) |
| 503 | |
| 504 | DependencyManager.sqlglot.require(why="SQL parsing") |
| 505 | |
| 506 | from sqlglot import exp, parse |
| 507 | from sqlglot.errors import OptimizeError |
| 508 | from sqlglot.optimizer.scope import build_scope |
| 509 | |
| 510 | def get_ref_from_table(table: exp.Table) -> SQLRef | None: |
| 511 | # The variables might be empty strings, if they are, we set them to None |
| 512 | try: |
| 513 | table_name = table.name or None |
| 514 | except AttributeError: |
| 515 | # sqlglot may return Table nodes with this=None (e.g. DROP SCHEMA) |
| 516 | return None |
| 517 | schema_name = table.db or None |
| 518 | catalog_name = table.catalog or None |
| 519 | |
| 520 | if table_name is None: |
| 521 | return None |
| 522 | |
| 523 | # Check if the table name looks like a URL or has a file extension. |
| 524 | # These are often not actual table references, so we skip them. |
| 525 | # Note that they can be valid table names, but we skip them to avoid circular deps |
| 526 | if "://" in table_name or table_name.endswith(COMMON_FILE_EXTENSIONS): |
| 527 | return None |
| 528 | |
| 529 | return SQLRef( |
| 530 | table=table_name, schema=schema_name, catalog=catalog_name |
| 531 | ) |
| 532 | |
| 533 | from sqlglot.errors import ParseError |
| 534 | |
| 535 | try: |
| 536 | with _loggers.suppress_warnings_logs("sqlglot"): |
| 537 | expression_list = parse(sql_statement, dialect="duckdb") |
searching dependent graphs…