Download a URL to a filesystem path, possibly with verification.
(url: str, path: pathlib.Path, size: int, sha256: str)
| 296 | |
| 297 | |
| 298 | def download_to_path(url: str, path: pathlib.Path, size: int, sha256: str): |
| 299 | """Download a URL to a filesystem path, possibly with verification.""" |
| 300 | |
| 301 | # We download to a temporary file and rename at the end so there's |
| 302 | # no chance of the final file being partially written or containing |
| 303 | # bad data. |
| 304 | print("downloading %s to %s" % (url, path)) |
| 305 | |
| 306 | if path.exists(): |
| 307 | good = True |
| 308 | |
| 309 | if path.stat().st_size != size: |
| 310 | print("existing file size is wrong; removing") |
| 311 | good = False |
| 312 | |
| 313 | if good: |
| 314 | if hash_path(path) != sha256: |
| 315 | print("existing file hash is wrong; removing") |
| 316 | good = False |
| 317 | |
| 318 | if good: |
| 319 | print("%s exists and passes integrity checks" % path) |
| 320 | return |
| 321 | |
| 322 | path.unlink() |
| 323 | |
| 324 | # Need to write to random path to avoid race conditions. If there is a |
| 325 | # race, worst case we'll download the same file N>1 times. Meh. |
| 326 | tmp = path.with_name( |
| 327 | "%s.tmp%s" |
| 328 | % ( |
| 329 | path.name, |
| 330 | "".join(random.choices(string.ascii_uppercase + string.digits, k=8)), |
| 331 | ) |
| 332 | ) |
| 333 | |
| 334 | for attempt in range(8): |
| 335 | try: |
| 336 | try: |
| 337 | with tmp.open("wb") as fh: |
| 338 | for chunk in secure_download_stream(url, size, sha256): |
| 339 | fh.write(chunk) |
| 340 | |
| 341 | break |
| 342 | except IntegrityError as e: |
| 343 | tmp.unlink() |
| 344 | # If we didn't get most of the expected file, retry |
| 345 | if e.length > size * 0.75: |
| 346 | raise |
| 347 | print(f"Integrity error on {url}; retrying: {e}") |
| 348 | time.sleep(2**attempt) |
| 349 | except http.client.HTTPException as e: |
| 350 | print(f"HTTP exception on {url}; retrying: {e}") |
| 351 | time.sleep(2**attempt) |
| 352 | except urllib.error.URLError as e: |
| 353 | print(f"urllib error on {url}; retrying: {e}") |
| 354 | time.sleep(2**attempt) |
| 355 | else: |
no test coverage detected