MCPcopy
hub / github.com/borgbackup/borg / IntegrityCheckedFile

Class IntegrityCheckedFile

src/borg/crypto/file_integrity.py:131–217  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

129
130
131class IntegrityCheckedFile(FileLikeWrapper):
132 def __init__(self, path, write, filename=None, override_fd=None, integrity_data=None):
133 self.path = path
134 self.writing = write
135 mode = "wb" if write else "rb"
136 self.file_fd = override_fd or open(path, mode)
137 self.file_opened = override_fd is None
138 self.digests = {}
139
140 hash_cls = XXH64FileHashingWrapper
141
142 if not write:
143 algorithm_and_digests = self.load_integrity_data(path, integrity_data)
144 if algorithm_and_digests:
145 algorithm, self.digests = algorithm_and_digests
146 hash_cls = SUPPORTED_ALGORITHMS[algorithm]
147
148 # TODO: When we're reading but don't have any digests, i.e. no integrity file existed,
149 # TODO: then we could just short-circuit.
150
151 self.hasher = hash_cls(backing_fd=self.file_fd, write=write)
152 super().__init__(self.hasher)
153 self.hash_filename(filename)
154
155 def load_integrity_data(self, path, integrity_data):
156 if integrity_data is not None:
157 return self.parse_integrity_data(path, integrity_data)
158
159 def hash_filename(self, filename=None):
160 # Hash the name of the file, but only the basename, ie. not the path.
161 # In Borg the name itself encodes the context (eg. index.N, cache, files),
162 # while the path doesn't matter, and moving e.g. a repository or cache directory is supported.
163 # Changing the name however imbues a change of context that is not permissible.
164 # While Borg does not use anything except ASCII in these file names, it's important to use
165 # the same encoding everywhere for portability. Using os.fsencode() would be wrong.
166 filename = Path(filename or self.path).name
167 self.hasher.update(("%10d" % len(filename)).encode())
168 self.hasher.update(filename.encode())
169
170 @classmethod
171 def parse_integrity_data(cls, path: str, data: str):
172 try:
173 integrity_data = json.loads(data)
174 # Provisions for agility now, implementation later, but make sure the on-disk joint is oiled.
175 algorithm = integrity_data["algorithm"]
176 if algorithm not in SUPPORTED_ALGORITHMS:
177 logger.warning("Cannot verify integrity of %s: Unknown algorithm %r", path, algorithm)
178 return
179 digests = integrity_data["digests"]
180 # Require at least presence of the final digest
181 digests["final"]
182 return algorithm, digests
183 except (ValueError, TypeError, KeyError) as e:
184 logger.warning("Could not parse integrity data for %s: %s", path, e)
185 raise FileIntegrityError(path)
186
187 def hash_part(self, partname, is_final=False):
188 if not self.writing and not self.digests:

Callers 6

_read_files_cacheMethod · 0.85
_write_files_cacheMethod · 0.85
open_indexMethod · 0.85
_unpack_hintsMethod · 0.85
write_indexMethod · 0.85

Calls

no outgoing calls

Tested by 1