MCPcopy
hub / github.com/ArchiveBox/ArchiveBox / status

Function status

archivebox/main.py:475–571  ·  view source on GitHub ↗

Print out some info and statistics about the archive collection

(out_dir: Path=OUTPUT_DIR)

Source from the content-addressed store, hash-verified

473
474@enforce_types
475def status(out_dir: Path=OUTPUT_DIR) -> None:
476 """Print out some info and statistics about the archive collection"""
477
478 check_data_folder(out_dir=out_dir)
479
480 from core.models import Snapshot
481 from django.contrib.auth import get_user_model
482 User = get_user_model()
483
484 print('{green}[*] Scanning archive main index...{reset}'.format(**ANSI))
485 print(ANSI['lightyellow'], f' {out_dir}/*', ANSI['reset'])
486 num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.')
487 size = printable_filesize(num_bytes)
488 print(f' Index size: {size} across {num_files} files')
489 print()
490
491 links = load_main_index(out_dir=out_dir)
492 num_sql_links = links.count()
493 num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir))
494 print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {SQL_INDEX_FILENAME})')
495 print(f' > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR_NAME}/*/index.json)')
496 print()
497 print('{green}[*] Scanning archive data directories...{reset}'.format(**ANSI))
498 print(ANSI['lightyellow'], f' {ARCHIVE_DIR}/*', ANSI['reset'])
499 num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR)
500 size = printable_filesize(num_bytes)
501 print(f' Size: {size} across {num_files} files in {num_dirs} directories')
502 print(ANSI['black'])
503 num_indexed = len(get_indexed_folders(links, out_dir=out_dir))
504 num_archived = len(get_archived_folders(links, out_dir=out_dir))
505 num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir))
506 print(f' > indexed: {num_indexed}'.ljust(36), f'({get_indexed_folders.__doc__})')
507 print(f' > archived: {num_archived}'.ljust(36), f'({get_archived_folders.__doc__})')
508 print(f' > unarchived: {num_unarchived}'.ljust(36), f'({get_unarchived_folders.__doc__})')
509
510 num_present = len(get_present_folders(links, out_dir=out_dir))
511 num_valid = len(get_valid_folders(links, out_dir=out_dir))
512 print()
513 print(f' > present: {num_present}'.ljust(36), f'({get_present_folders.__doc__})')
514 print(f' > valid: {num_valid}'.ljust(36), f'({get_valid_folders.__doc__})')
515
516 duplicate = get_duplicate_folders(links, out_dir=out_dir)
517 orphaned = get_orphaned_folders(links, out_dir=out_dir)
518 corrupted = get_corrupted_folders(links, out_dir=out_dir)
519 unrecognized = get_unrecognized_folders(links, out_dir=out_dir)
520 num_invalid = len({**duplicate, **orphaned, **corrupted, **unrecognized})
521 print(f' > invalid: {num_invalid}'.ljust(36), f'({get_invalid_folders.__doc__})')
522 print(f' > duplicate: {len(duplicate)}'.ljust(36), f'({get_duplicate_folders.__doc__})')
523 print(f' > orphaned: {len(orphaned)}'.ljust(36), f'({get_orphaned_folders.__doc__})')
524 print(f' > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})')
525 print(f' > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})')
526
527 print(ANSI['reset'])
528
529 if num_indexed:
530 print(' {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**ANSI))
531 print(' archivebox list --status=<status> (e.g. indexed, corrupted, archived, etc.)')
532

Callers 1

mainFunction · 0.85

Calls 15

check_data_folderFunction · 0.85
get_dir_sizeFunction · 0.85
printable_filesizeFunction · 0.85
load_main_indexFunction · 0.85
parse_json_links_detailsFunction · 0.85
get_indexed_foldersFunction · 0.85
get_archived_foldersFunction · 0.85
get_unarchived_foldersFunction · 0.85
get_present_foldersFunction · 0.85
get_valid_foldersFunction · 0.85
get_duplicate_foldersFunction · 0.85
get_orphaned_foldersFunction · 0.85

Tested by

no test coverage detected