hub / github.com/borgbackup/borg / build_chunkindex_from_repo

Function build_chunkindex_from_repo

src/borg/cache.py:799–845 · view source on GitHub ↗

(repository, *, disable_caches=False, cache_immediately=False)

Source from the content-addressed store, hash-verified

797
798
799	def build_chunkindex_from_repo(repository, *, disable_caches=False, cache_immediately=False):
800	# first, try to build a fresh, mostly complete chunk index from centrally cached chunk indexes:
801	if not disable_caches:
802	hashes = list_chunkindex_hashes(repository)
803	if hashes: # we have at least one cached chunk index!
804	merged = 0
805	chunks = ChunkIndex() # we'll merge all we find into this
806	for hash in hashes:
807	chunks_to_merge = read_chunkindex_from_repo_cache(repository, hash)
808	if chunks_to_merge is not None:
809	logger.debug(f"cached chunk index {hash} gets merged...")
810	for k, v in chunks_to_merge.items():
811	chunks[k] = v
812	merged += 1
813	chunks_to_merge.clear()
814	if merged > 0:
815	if merged > 1 and cache_immediately:
816	# immediately update cache/chunks, so we don't have to merge these again:
817	write_chunkindex_to_repo_cache(
818	repository, chunks, clear=False, force_write=True, delete_these=hashes
819	)
820	else:
821	chunks.clear_new()
822	return chunks
823	# if we didn't get anything from the cache, compute the ChunkIndex the slow way:
824	logger.debug("querying the chunk IDs list from the repo...")
825	chunks = ChunkIndex()
826	t0 = perf_counter()
827	num_chunks = 0
828	# The repo says it has these chunks, so we assume they are referenced/used chunks.
829	# We do not know the plaintext size (!= stored_size), thus we set size = 0.
830	init_entry = ChunkIndexEntry(flags=ChunkIndex.F_USED, size=0)
831	for id, stored_size in repo_lister(repository, limit=LIST_SCAN_LIMIT):
832	num_chunks += 1
833	chunks[id] = init_entry
834	# Cache does not contain the manifest.
835	if not isinstance(repository, (Repository, RemoteRepository)):
836	del chunks[Manifest.MANIFEST_ID]
837	duration = perf_counter() - t0 or 0.001
838	# Chunk IDs in a list are encoded in 34 bytes: 1 byte msgpack header, 1 byte length, 32 ID bytes.
839	# Protocol overhead is neglected in this calculation.
840	speed = format_file_size(num_chunks * 34 / duration)
841	logger.debug(f"queried {num_chunks} chunk IDs in {duration} s, ~{speed}/s")
842	if cache_immediately:
843	# immediately update cache/chunks, so we only rarely have to do it the slow way:
844	write_chunkindex_to_repo_cache(repository, chunks, clear=False, force_write=True, delete_other=True)
845	return chunks
846
847
848	class ChunksMixin:

Callers 3

chunksMethod · 0.85

checkMethod · 0.85

get_repository_chunksMethod · 0.85

Calls 8

list_chunkindex_hashesFunction · 0.85

read_chunkindex_from_repo_cacheFunction · 0.85

write_chunkindex_to_repo_cacheFunction · 0.85

repo_listerFunction · 0.85

format_file_sizeFunction · 0.85

debugMethod · 0.80

clearMethod · 0.80

itemsMethod · 0.45

Tested by

no test coverage detected