Download URL to a directory. Will figure out the filename automatically from URL, if not given.
(url, dir, filename=None, expect_size=None)
| 30 | |
| 31 | |
| 32 | def download(url, dir, filename=None, expect_size=None): |
| 33 | """ |
| 34 | Download URL to a directory. |
| 35 | Will figure out the filename automatically from URL, if not given. |
| 36 | """ |
| 37 | mkdir_p(dir) |
| 38 | if filename is None: |
| 39 | filename = url.split('/')[-1] |
| 40 | fpath = os.path.join(dir, filename) |
| 41 | |
| 42 | if os.path.isfile(fpath): |
| 43 | if expect_size is not None and os.stat(fpath).st_size == expect_size: |
| 44 | logger.info("File {} exists! Skip download.".format(filename)) |
| 45 | return fpath |
| 46 | else: |
| 47 | logger.warn("File {} exists. Will overwrite with a new download!".format(filename)) |
| 48 | |
| 49 | def hook(t): |
| 50 | last_b = [0] |
| 51 | |
| 52 | def inner(b, bsize, tsize=None): |
| 53 | if tsize is not None: |
| 54 | t.total = tsize |
| 55 | t.update((b - last_b[0]) * bsize) |
| 56 | last_b[0] = b |
| 57 | return inner |
| 58 | try: |
| 59 | with tqdm.tqdm(unit='B', unit_scale=True, miniters=1, desc=filename) as t: |
| 60 | fpath, _ = urllib.request.urlretrieve(url, fpath, reporthook=hook(t)) |
| 61 | statinfo = os.stat(fpath) |
| 62 | size = statinfo.st_size |
| 63 | except IOError: |
| 64 | logger.error("Failed to download {}".format(url)) |
| 65 | raise |
| 66 | assert size > 0, "Downloaded an empty file from {}!".format(url) |
| 67 | |
| 68 | if expect_size is not None and size != expect_size: |
| 69 | logger.error("File downloaded from {} does not match the expected size!".format(url)) |
| 70 | logger.error("You may have downloaded a broken file, or the upstream may have modified the file.") |
| 71 | |
| 72 | # TODO human-readable size |
| 73 | logger.info('Succesfully downloaded ' + filename + ". " + str(size) + ' bytes.') |
| 74 | return fpath |
| 75 | |
| 76 | |
| 77 | def recursive_walk(rootdir): |
no test coverage detected