MCPcopy
hub / github.com/borgbackup/borg / build_chunkindex_from_repo

Function build_chunkindex_from_repo

src/borg/cache.py:799–845  ·  view source on GitHub ↗
(repository, *, disable_caches=False, cache_immediately=False)

Source from the content-addressed store, hash-verified

797
798
799def build_chunkindex_from_repo(repository, *, disable_caches=False, cache_immediately=False):
800 # first, try to build a fresh, mostly complete chunk index from centrally cached chunk indexes:
801 if not disable_caches:
802 hashes = list_chunkindex_hashes(repository)
803 if hashes: # we have at least one cached chunk index!
804 merged = 0
805 chunks = ChunkIndex() # we'll merge all we find into this
806 for hash in hashes:
807 chunks_to_merge = read_chunkindex_from_repo_cache(repository, hash)
808 if chunks_to_merge is not None:
809 logger.debug(f"cached chunk index {hash} gets merged...")
810 for k, v in chunks_to_merge.items():
811 chunks[k] = v
812 merged += 1
813 chunks_to_merge.clear()
814 if merged > 0:
815 if merged > 1 and cache_immediately:
816 # immediately update cache/chunks, so we don't have to merge these again:
817 write_chunkindex_to_repo_cache(
818 repository, chunks, clear=False, force_write=True, delete_these=hashes
819 )
820 else:
821 chunks.clear_new()
822 return chunks
823 # if we didn't get anything from the cache, compute the ChunkIndex the slow way:
824 logger.debug("querying the chunk IDs list from the repo...")
825 chunks = ChunkIndex()
826 t0 = perf_counter()
827 num_chunks = 0
828 # The repo says it has these chunks, so we assume they are referenced/used chunks.
829 # We do not know the plaintext size (!= stored_size), thus we set size = 0.
830 init_entry = ChunkIndexEntry(flags=ChunkIndex.F_USED, size=0)
831 for id, stored_size in repo_lister(repository, limit=LIST_SCAN_LIMIT):
832 num_chunks += 1
833 chunks[id] = init_entry
834 # Cache does not contain the manifest.
835 if not isinstance(repository, (Repository, RemoteRepository)):
836 del chunks[Manifest.MANIFEST_ID]
837 duration = perf_counter() - t0 or 0.001
838 # Chunk IDs in a list are encoded in 34 bytes: 1 byte msgpack header, 1 byte length, 32 ID bytes.
839 # Protocol overhead is neglected in this calculation.
840 speed = format_file_size(num_chunks * 34 / duration)
841 logger.debug(f"queried {num_chunks} chunk IDs in {duration} s, ~{speed}/s")
842 if cache_immediately:
843 # immediately update cache/chunks, so we only rarely have to do it the slow way:
844 write_chunkindex_to_repo_cache(repository, chunks, clear=False, force_write=True, delete_other=True)
845 return chunks
846
847
848class ChunksMixin:

Callers 3

chunksMethod · 0.85
checkMethod · 0.85
get_repository_chunksMethod · 0.85

Calls 8

list_chunkindex_hashesFunction · 0.85
repo_listerFunction · 0.85
format_file_sizeFunction · 0.85
debugMethod · 0.80
clearMethod · 0.80
itemsMethod · 0.45

Tested by

no test coverage detected