MCPcopy
hub / github.com/huggingface/datasets / cached_path

Function cached_path

src/datasets/utils/file_utils.py:134–254  ·  view source on GitHub ↗

Given something that might be a URL (or might be a local path), determine which. If it's a URL, download the file and cache it, and return the path to the cached file. If it's already a local path, make sure the file exists and then return the path. Return: Local path (

(
    url_or_filename,
    download_config=None,
    **download_kwargs,
)

Source from the content-addressed store, hash-verified

132
133
134def cached_path(
135 url_or_filename,
136 download_config=None,
137 **download_kwargs,
138) -> str:
139 """
140 Given something that might be a URL (or might be a local path),
141 determine which. If it's a URL, download the file and cache it, and
142 return the path to the cached file. If it's already a local path,
143 make sure the file exists and then return the path.
144
145 Return:
146 Local path (string)
147
148 Raises:
149 FileNotFoundError: in case of non-recoverable file
150 (non-existent or no cache on disk)
151 ConnectionError: in case of unreachable url
152 and no cache on disk
153 ValueError: if it couldn't parse the url or filename correctly
154 httpx.NetworkError or requests.exceptions.ConnectionError: in case of internet connection issue
155 """
156 if download_config is None:
157 download_config = DownloadConfig(**download_kwargs)
158
159 cache_dir = download_config.cache_dir or config.DOWNLOADED_DATASETS_PATH
160 if isinstance(cache_dir, Path):
161 cache_dir = str(cache_dir)
162 if isinstance(url_or_filename, Path):
163 url_or_filename = str(url_or_filename)
164
165 # Convert fsspec URL in the format "file://local/path" to "local/path"
166 if can_be_local(url_or_filename):
167 url_or_filename = strip_protocol(url_or_filename)
168
169 if is_remote_url(url_or_filename):
170 # URL, so get it from the cache (downloading if necessary)
171 url_or_filename, storage_options = _prepare_path_and_storage_options(
172 url_or_filename, download_config=download_config
173 )
174 # Download files from Hugging Face.
175 # Note: no need to check for https://huggingface.co file URLs since _prepare_path_and_storage_options
176 # prepares Hugging Face HTTP URLs as hf:// paths already
177 if url_or_filename.startswith("hf://") and not url_or_filename.startswith("hf://buckets/"):
178 resolved_path = huggingface_hub.HfFileSystem(
179 endpoint=config.HF_ENDPOINT, token=download_config.token
180 ).resolve_path(url_or_filename)
181 try:
182 output_path = huggingface_hub.HfApi(
183 endpoint=config.HF_ENDPOINT,
184 token=download_config.token,
185 library_name="datasets",
186 library_version=__version__,
187 user_agent=get_datasets_user_agent(download_config.user_agent),
188 ).hf_hub_download(
189 repo_id=resolved_path.repo_id,
190 repo_type=resolved_path.repo_type,
191 revision=resolved_path.revision,

Calls 13

DownloadConfigClass · 0.85
is_remote_urlFunction · 0.85
get_datasets_user_agentFunction · 0.85
get_from_cacheFunction · 0.85
is_local_pathFunction · 0.85
_get_extraction_protocolFunction · 0.85
_get_path_extensionFunction · 0.85
ExtractManagerClass · 0.85
existsMethod · 0.80
splitMethod · 0.80