| 1848 | raise IntegrityError(msg) |
| 1849 | |
| 1850 | def verify_data(self): |
| 1851 | logger.info("Starting cryptographic data integrity verification...") |
| 1852 | chunks_count = len(self.chunks) |
| 1853 | errors = 0 |
| 1854 | defect_chunks = [] |
| 1855 | pi = ProgressIndicatorPercent( |
| 1856 | total=chunks_count, msg="Verifying data %6.2f%%", step=0.01, msgid="check.verify_data" |
| 1857 | ) |
| 1858 | for chunk_id, _ in self.chunks.iteritems(): |
| 1859 | pi.show() |
| 1860 | try: |
| 1861 | encrypted_data = self.repository.get(chunk_id) |
| 1862 | except (Repository.ObjectNotFound, IntegrityErrorBase) as err: |
| 1863 | self.error_found = True |
| 1864 | errors += 1 |
| 1865 | logger.error("chunk %s: %s", bin_to_hex(chunk_id), err) |
| 1866 | if isinstance(err, IntegrityErrorBase): |
| 1867 | defect_chunks.append(chunk_id) |
| 1868 | else: |
| 1869 | try: |
| 1870 | # we must decompress, so it'll call assert_id() in there: |
| 1871 | self.repo_objs.parse(chunk_id, encrypted_data, decompress=True, ro_type=ROBJ_DONTCARE) |
| 1872 | except IntegrityErrorBase as integrity_error: |
| 1873 | self.error_found = True |
| 1874 | errors += 1 |
| 1875 | logger.error("chunk %s, integrity error: %s", bin_to_hex(chunk_id), integrity_error) |
| 1876 | defect_chunks.append(chunk_id) |
| 1877 | pi.finish() |
| 1878 | if defect_chunks: |
| 1879 | if self.repair: |
| 1880 | # if we kill the defect chunk here, subsequent actions within this "borg check" |
| 1881 | # run will find missing chunks. |
| 1882 | logger.warning( |
| 1883 | "Found defect chunks and will delete them now. " |
| 1884 | "Reading files referencing these chunks will result in an I/O error." |
| 1885 | ) |
| 1886 | for defect_chunk in defect_chunks: |
| 1887 | # remote repo (ssh): retry might help for strange network / NIC / RAM errors |
| 1888 | # as the chunk will be retransmitted from remote server. |
| 1889 | # local repo (fs): as chunks.iteritems loop usually pumps a lot of data through, |
| 1890 | # a defect chunk is likely not in the fs cache any more and really gets re-read |
| 1891 | # from the underlying media. |
| 1892 | try: |
| 1893 | encrypted_data = self.repository.get(defect_chunk) |
| 1894 | # we must decompress, so it'll call assert_id() in there: |
| 1895 | self.repo_objs.parse(defect_chunk, encrypted_data, decompress=True, ro_type=ROBJ_DONTCARE) |
| 1896 | except IntegrityErrorBase: |
| 1897 | # failed twice -> get rid of this chunk |
| 1898 | del self.chunks[defect_chunk] |
| 1899 | self.repository.delete(defect_chunk) |
| 1900 | logger.debug("chunk %s deleted.", bin_to_hex(defect_chunk)) |
| 1901 | else: |
| 1902 | logger.warning("chunk %s not deleted, did not consistently fail.", bin_to_hex(defect_chunk)) |
| 1903 | else: |
| 1904 | logger.warning("Found defect chunks. With --repair, they would get deleted.") |
| 1905 | for defect_chunk in defect_chunks: |
| 1906 | logger.debug("chunk %s is defect.", bin_to_hex(defect_chunk)) |
| 1907 | log = logger.error if errors else logger.info |