| 129 | |
| 130 | |
| 131 | class IntegrityCheckedFile(FileLikeWrapper): |
| 132 | def __init__(self, path, write, filename=None, override_fd=None, integrity_data=None): |
| 133 | self.path = path |
| 134 | self.writing = write |
| 135 | mode = "wb" if write else "rb" |
| 136 | self.file_fd = override_fd or open(path, mode) |
| 137 | self.file_opened = override_fd is None |
| 138 | self.digests = {} |
| 139 | |
| 140 | hash_cls = XXH64FileHashingWrapper |
| 141 | |
| 142 | if not write: |
| 143 | algorithm_and_digests = self.load_integrity_data(path, integrity_data) |
| 144 | if algorithm_and_digests: |
| 145 | algorithm, self.digests = algorithm_and_digests |
| 146 | hash_cls = SUPPORTED_ALGORITHMS[algorithm] |
| 147 | |
| 148 | # TODO: When we're reading but don't have any digests, i.e. no integrity file existed, |
| 149 | # TODO: then we could just short-circuit. |
| 150 | |
| 151 | self.hasher = hash_cls(backing_fd=self.file_fd, write=write) |
| 152 | super().__init__(self.hasher) |
| 153 | self.hash_filename(filename) |
| 154 | |
| 155 | def load_integrity_data(self, path, integrity_data): |
| 156 | if integrity_data is not None: |
| 157 | return self.parse_integrity_data(path, integrity_data) |
| 158 | |
| 159 | def hash_filename(self, filename=None): |
| 160 | # Hash the name of the file, but only the basename, ie. not the path. |
| 161 | # In Borg the name itself encodes the context (eg. index.N, cache, files), |
| 162 | # while the path doesn't matter, and moving e.g. a repository or cache directory is supported. |
| 163 | # Changing the name however imbues a change of context that is not permissible. |
| 164 | # While Borg does not use anything except ASCII in these file names, it's important to use |
| 165 | # the same encoding everywhere for portability. Using os.fsencode() would be wrong. |
| 166 | filename = Path(filename or self.path).name |
| 167 | self.hasher.update(("%10d" % len(filename)).encode()) |
| 168 | self.hasher.update(filename.encode()) |
| 169 | |
| 170 | @classmethod |
| 171 | def parse_integrity_data(cls, path: str, data: str): |
| 172 | try: |
| 173 | integrity_data = json.loads(data) |
| 174 | # Provisions for agility now, implementation later, but make sure the on-disk joint is oiled. |
| 175 | algorithm = integrity_data["algorithm"] |
| 176 | if algorithm not in SUPPORTED_ALGORITHMS: |
| 177 | logger.warning("Cannot verify integrity of %s: Unknown algorithm %r", path, algorithm) |
| 178 | return |
| 179 | digests = integrity_data["digests"] |
| 180 | # Require at least presence of the final digest |
| 181 | digests["final"] |
| 182 | return algorithm, digests |
| 183 | except (ValueError, TypeError, KeyError) as e: |
| 184 | logger.warning("Could not parse integrity data for %s: %s", path, e) |
| 185 | raise FileIntegrityError(path) |
| 186 | |
| 187 | def hash_part(self, partname, is_final=False): |
| 188 | if not self.writing and not self.digests: |
no outgoing calls