(repository, *, disable_caches=False, cache_immediately=False)
| 797 | |
| 798 | |
| 799 | def build_chunkindex_from_repo(repository, *, disable_caches=False, cache_immediately=False): |
| 800 | # first, try to build a fresh, mostly complete chunk index from centrally cached chunk indexes: |
| 801 | if not disable_caches: |
| 802 | hashes = list_chunkindex_hashes(repository) |
| 803 | if hashes: # we have at least one cached chunk index! |
| 804 | merged = 0 |
| 805 | chunks = ChunkIndex() # we'll merge all we find into this |
| 806 | for hash in hashes: |
| 807 | chunks_to_merge = read_chunkindex_from_repo_cache(repository, hash) |
| 808 | if chunks_to_merge is not None: |
| 809 | logger.debug(f"cached chunk index {hash} gets merged...") |
| 810 | for k, v in chunks_to_merge.items(): |
| 811 | chunks[k] = v |
| 812 | merged += 1 |
| 813 | chunks_to_merge.clear() |
| 814 | if merged > 0: |
| 815 | if merged > 1 and cache_immediately: |
| 816 | # immediately update cache/chunks, so we don't have to merge these again: |
| 817 | write_chunkindex_to_repo_cache( |
| 818 | repository, chunks, clear=False, force_write=True, delete_these=hashes |
| 819 | ) |
| 820 | else: |
| 821 | chunks.clear_new() |
| 822 | return chunks |
| 823 | # if we didn't get anything from the cache, compute the ChunkIndex the slow way: |
| 824 | logger.debug("querying the chunk IDs list from the repo...") |
| 825 | chunks = ChunkIndex() |
| 826 | t0 = perf_counter() |
| 827 | num_chunks = 0 |
| 828 | # The repo says it has these chunks, so we assume they are referenced/used chunks. |
| 829 | # We do not know the plaintext size (!= stored_size), thus we set size = 0. |
| 830 | init_entry = ChunkIndexEntry(flags=ChunkIndex.F_USED, size=0) |
| 831 | for id, stored_size in repo_lister(repository, limit=LIST_SCAN_LIMIT): |
| 832 | num_chunks += 1 |
| 833 | chunks[id] = init_entry |
| 834 | # Cache does not contain the manifest. |
| 835 | if not isinstance(repository, (Repository, RemoteRepository)): |
| 836 | del chunks[Manifest.MANIFEST_ID] |
| 837 | duration = perf_counter() - t0 or 0.001 |
| 838 | # Chunk IDs in a list are encoded in 34 bytes: 1 byte msgpack header, 1 byte length, 32 ID bytes. |
| 839 | # Protocol overhead is neglected in this calculation. |
| 840 | speed = format_file_size(num_chunks * 34 / duration) |
| 841 | logger.debug(f"queried {num_chunks} chunk IDs in {duration} s, ~{speed}/s") |
| 842 | if cache_immediately: |
| 843 | # immediately update cache/chunks, so we only rarely have to do it the slow way: |
| 844 | write_chunkindex_to_repo_cache(repository, chunks, clear=False, force_write=True, delete_other=True) |
| 845 | return chunks |
| 846 | |
| 847 | |
| 848 | class ChunksMixin: |
no test coverage detected