MCPcopy
hub / github.com/huggingface/datasets / glob

Method glob

src/datasets/utils/file_utils.py:1116–1142  ·  view source on GitHub ↗

Glob function for argument of type :obj:`~pathlib.Path` that supports both local paths end remote URLs. Args: pattern (`str`): Pattern that resulting paths must match. download_config : mainly use token or storage_options to support different platforms and auth types

(self, pattern, download_config: Optional[DownloadConfig] = None)

Source from the content-addressed store, hash-verified

1114 return xexists(str(self), download_config=download_config)
1115
1116 def glob(self, pattern, download_config: Optional[DownloadConfig] = None):
1117 """Glob function for argument of type :obj:`~pathlib.Path` that supports both local paths end remote URLs.
1118
1119 Args:
1120 pattern (`str`): Pattern that resulting paths must match.
1121 download_config : mainly use token or storage_options to support different platforms and auth types.
1122
1123 Yields:
1124 [`xPath`]
1125 """
1126 posix_path = self.as_posix()
1127 main_hop, *rest_hops = posix_path.split("::")
1128 if is_local_path(main_hop):
1129 yield from Path(main_hop).glob(pattern)
1130 else:
1131 # globbing inside a zip in a private repo requires authentication
1132 if rest_hops:
1133 urlpath = rest_hops[0]
1134 urlpath, storage_options = _prepare_path_and_storage_options(urlpath, download_config=download_config)
1135 storage_options = {urlpath.split("://")[0]: storage_options}
1136 posix_path = "::".join([main_hop, urlpath, *rest_hops[1:]])
1137 else:
1138 storage_options = None
1139 fs, *_ = url_to_fs(xjoin(posix_path, pattern), **(storage_options or {}))
1140 globbed_paths = fs.glob(xjoin(main_hop, pattern))
1141 for globbed_path in globbed_paths:
1142 yield type(self)("::".join([f"{fs.protocol}://{globbed_path}"] + rest_hops))
1143
1144 def rglob(self, pattern, **kwargs):
1145 """Rglob function for argument of type :obj:`~pathlib.Path` that supports both local paths end remote URLs.

Callers 15

rglobMethod · 0.95
_push_to_repoFunction · 0.80
_push_to_bucketFunction · 0.80
resolve_patternFunction · 0.80
_push_to_repoFunction · 0.80
_push_to_bucketFunction · 0.80
get_moduleMethod · 0.80
xglobFunction · 0.80
_find_hash_in_cacheFunction · 0.80

Calls 4

is_local_pathFunction · 0.85
xjoinFunction · 0.85
splitMethod · 0.80

Tested by 6

test_xpath_globMethod · 0.64
pattern_resultsFunction · 0.64
resolverFunction · 0.64