hub / github.com/borgbackup/borg / IntegrityCheckedFile

Class IntegrityCheckedFile

src/borg/crypto/file_integrity.py:131–217 · view source on GitHub ↗

Source from the content-addressed store, hash-verified

129
130
131	class IntegrityCheckedFile(FileLikeWrapper):
132	def __init__(self, path, write, filename=None, override_fd=None, integrity_data=None):
133	self.path = path
134	self.writing = write
135	mode = "wb" if write else "rb"
136	self.file_fd = override_fd or open(path, mode)
137	self.file_opened = override_fd is None
138	self.digests = {}
139
140	hash_cls = XXH64FileHashingWrapper
141
142	if not write:
143	algorithm_and_digests = self.load_integrity_data(path, integrity_data)
144	if algorithm_and_digests:
145	algorithm, self.digests = algorithm_and_digests
146	hash_cls = SUPPORTED_ALGORITHMS[algorithm]
147
148	# TODO: When we're reading but don't have any digests, i.e. no integrity file existed,
149	# TODO: then we could just short-circuit.
150
151	self.hasher = hash_cls(backing_fd=self.file_fd, write=write)
152	super().__init__(self.hasher)
153	self.hash_filename(filename)
154
155	def load_integrity_data(self, path, integrity_data):
156	if integrity_data is not None:
157	return self.parse_integrity_data(path, integrity_data)
158
159	def hash_filename(self, filename=None):
160	# Hash the name of the file, but only the basename, ie. not the path.
161	# In Borg the name itself encodes the context (eg. index.N, cache, files),
162	# while the path doesn't matter, and moving e.g. a repository or cache directory is supported.
163	# Changing the name however imbues a change of context that is not permissible.
164	# While Borg does not use anything except ASCII in these file names, it's important to use
165	# the same encoding everywhere for portability. Using os.fsencode() would be wrong.
166	filename = Path(filename or self.path).name
167	self.hasher.update(("%10d" % len(filename)).encode())
168	self.hasher.update(filename.encode())
169
170	@classmethod
171	def parse_integrity_data(cls, path: str, data: str):
172	try:
173	integrity_data = json.loads(data)
174	# Provisions for agility now, implementation later, but make sure the on-disk joint is oiled.
175	algorithm = integrity_data["algorithm"]
176	if algorithm not in SUPPORTED_ALGORITHMS:
177	logger.warning("Cannot verify integrity of %s: Unknown algorithm %r", path, algorithm)
178	return
179	digests = integrity_data["digests"]
180	# Require at least presence of the final digest
181	digests["final"]
182	return algorithm, digests
183	except (ValueError, TypeError, KeyError) as e:
184	logger.warning("Could not parse integrity data for %s: %s", path, e)
185	raise FileIntegrityError(path)
186
187	def hash_part(self, partname, is_final=False):
188	if not self.writing and not self.digests:

Callers 6

_read_files_cacheMethod · 0.85

_write_files_cacheMethod · 0.85

open_indexMethod · 0.85

_unpack_hintsMethod · 0.85

write_indexMethod · 0.85

test_write_and_verify_with_syncfileMethod · 0.85

Calls

no outgoing calls

Tested by 1

test_write_and_verify_with_syncfileMethod · 0.68