Return number of rows for each requested table name. Args: p (dlt.Pipeline): Pipeline instance. *table_names (str): Optional list of table names. When omitted, counts for all data tables in the default schema are returned. Returns: DictStrAny: Mapping of
(p: dlt.Pipeline, *table_names: str)
| 410 | |
| 411 | |
| 412 | def load_table_counts(p: dlt.Pipeline, *table_names: str) -> DictStrAny: |
| 413 | """Return number of rows for each requested table name. |
| 414 | |
| 415 | Args: |
| 416 | p (dlt.Pipeline): Pipeline instance. |
| 417 | *table_names (str): Optional list of table names. When omitted, counts |
| 418 | for all data tables in the default schema are returned. |
| 419 | |
| 420 | Returns: |
| 421 | DictStrAny: Mapping of table name to integer row count. |
| 422 | """ |
| 423 | if not table_names: |
| 424 | table_names = [table["name"] for table in p.default_schema.data_tables()] # type: ignore[assignment] |
| 425 | |
| 426 | # filesystem with sftp requires a fallback |
| 427 | if _is_sftp(p): |
| 428 | file_tables = _load_tables_to_dicts_fs(p, *table_names) |
| 429 | return {table_name: len(items) for table_name, items in file_tables.items()} |
| 430 | |
| 431 | # NOTE: filesystem with abfss and no table format requires a fallback where we get each table count individually |
| 432 | # this seems to be a bug in duckdb abfss and might be resolved in a future version. |
| 433 | if _is_abfss(p): |
| 434 | table_counts = {} |
| 435 | for table in table_names: |
| 436 | table_counts[table] = p.dataset().row_counts(table_names=[table]).fetchall()[0][1] |
| 437 | return table_counts |
| 438 | |
| 439 | # otherwise we can use the dataset row counts |
| 440 | counts = p.dataset().row_counts(table_names=list(table_names)).fetchall() |
| 441 | return {row[0]: row[1] for row in counts} |
| 442 | |
| 443 | |
| 444 | def assert_empty_tables(p: dlt.Pipeline, *table_names: str) -> None: |