Print out some info and statistics about the archive collection
(out_dir: Path=OUTPUT_DIR)
| 473 | |
| 474 | @enforce_types |
| 475 | def status(out_dir: Path=OUTPUT_DIR) -> None: |
| 476 | """Print out some info and statistics about the archive collection""" |
| 477 | |
| 478 | check_data_folder(out_dir=out_dir) |
| 479 | |
| 480 | from core.models import Snapshot |
| 481 | from django.contrib.auth import get_user_model |
| 482 | User = get_user_model() |
| 483 | |
| 484 | print('{green}[*] Scanning archive main index...{reset}'.format(**ANSI)) |
| 485 | print(ANSI['lightyellow'], f' {out_dir}/*', ANSI['reset']) |
| 486 | num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.') |
| 487 | size = printable_filesize(num_bytes) |
| 488 | print(f' Index size: {size} across {num_files} files') |
| 489 | print() |
| 490 | |
| 491 | links = load_main_index(out_dir=out_dir) |
| 492 | num_sql_links = links.count() |
| 493 | num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir)) |
| 494 | print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {SQL_INDEX_FILENAME})') |
| 495 | print(f' > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR_NAME}/*/index.json)') |
| 496 | print() |
| 497 | print('{green}[*] Scanning archive data directories...{reset}'.format(**ANSI)) |
| 498 | print(ANSI['lightyellow'], f' {ARCHIVE_DIR}/*', ANSI['reset']) |
| 499 | num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR) |
| 500 | size = printable_filesize(num_bytes) |
| 501 | print(f' Size: {size} across {num_files} files in {num_dirs} directories') |
| 502 | print(ANSI['black']) |
| 503 | num_indexed = len(get_indexed_folders(links, out_dir=out_dir)) |
| 504 | num_archived = len(get_archived_folders(links, out_dir=out_dir)) |
| 505 | num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir)) |
| 506 | print(f' > indexed: {num_indexed}'.ljust(36), f'({get_indexed_folders.__doc__})') |
| 507 | print(f' > archived: {num_archived}'.ljust(36), f'({get_archived_folders.__doc__})') |
| 508 | print(f' > unarchived: {num_unarchived}'.ljust(36), f'({get_unarchived_folders.__doc__})') |
| 509 | |
| 510 | num_present = len(get_present_folders(links, out_dir=out_dir)) |
| 511 | num_valid = len(get_valid_folders(links, out_dir=out_dir)) |
| 512 | print() |
| 513 | print(f' > present: {num_present}'.ljust(36), f'({get_present_folders.__doc__})') |
| 514 | print(f' > valid: {num_valid}'.ljust(36), f'({get_valid_folders.__doc__})') |
| 515 | |
| 516 | duplicate = get_duplicate_folders(links, out_dir=out_dir) |
| 517 | orphaned = get_orphaned_folders(links, out_dir=out_dir) |
| 518 | corrupted = get_corrupted_folders(links, out_dir=out_dir) |
| 519 | unrecognized = get_unrecognized_folders(links, out_dir=out_dir) |
| 520 | num_invalid = len({**duplicate, **orphaned, **corrupted, **unrecognized}) |
| 521 | print(f' > invalid: {num_invalid}'.ljust(36), f'({get_invalid_folders.__doc__})') |
| 522 | print(f' > duplicate: {len(duplicate)}'.ljust(36), f'({get_duplicate_folders.__doc__})') |
| 523 | print(f' > orphaned: {len(orphaned)}'.ljust(36), f'({get_orphaned_folders.__doc__})') |
| 524 | print(f' > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})') |
| 525 | print(f' > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})') |
| 526 | |
| 527 | print(ANSI['reset']) |
| 528 | |
| 529 | if num_indexed: |
| 530 | print(' {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**ANSI)) |
| 531 | print(' archivebox list --status=<status> (e.g. indexed, corrupted, archived, etc.)') |
| 532 |
no test coverage detected