MCPcopy
hub / github.com/treeverse/dvc / download

Function download

dvc/fs/__init__.py:49–88  ·  view source on GitHub ↗
(
    fs: "FileSystem", fs_path: str, to: str, jobs: Optional[int] = None
)

Source from the content-addressed store, hash-verified

47
48
49def download(
50 fs: "FileSystem", fs_path: str, to: str, jobs: Optional[int] = None
51) -> list[tuple[str, str, Optional[dict]]]:
52 from dvc.scm import lfs_prefetch
53
54 from .callbacks import TqdmCallback
55
56 with TqdmCallback(desc=f"Downloading {fs.name(fs_path)}", unit="files") as cb:
57 if isinstance(fs, DVCFileSystem):
58 lfs_prefetch(
59 fs,
60 [
61 f"{fs.normpath(glob.escape(fs_path))}/**"
62 if fs.isdir(fs_path)
63 else glob.escape(fs_path)
64 ],
65 )
66 if not glob.has_magic(fs_path):
67 return fs._get(fs_path, to, batch_size=jobs, callback=cb)
68
69 # NOTE: We use dvc-objects generic.copy over fs.get since it makes file
70 # download atomic and avoids fsspec glob/regex path expansion.
71 if fs.isdir(fs_path):
72 from_infos = [
73 path for path in fs.find(fs_path) if not path.endswith(fs.flavour.sep)
74 ]
75 if not from_infos:
76 localfs.makedirs(to, exist_ok=True)
77 return []
78 to_infos = [
79 localfs.join(to, *fs.relparts(info, fs_path)) for info in from_infos
80 ]
81 else:
82 from_infos = [fs_path]
83 to_infos = [to]
84
85 cb.set_size(len(from_infos))
86 jobs = jobs or fs.jobs
87 generic.copy(fs, from_infos, localfs, to_infos, callback=cb, batch_size=jobs)
88 return list(zip(from_infos, to_infos, repeat(None)))
89
90
91def parse_external_url(url, fs_config=None, config=None):

Callers 4

getFunction · 0.90
get_urlFunction · 0.90
test_lfs_prefetch_fileFunction · 0.90

Calls 10

lfs_prefetchFunction · 0.90
TqdmCallbackClass · 0.85
nameMethod · 0.80
normpathMethod · 0.80
isdirMethod · 0.80
joinMethod · 0.80
relpartsMethod · 0.80
set_sizeMethod · 0.80
_getMethod · 0.45
findMethod · 0.45

Tested by 2

test_lfs_prefetch_fileFunction · 0.72