MCPcopy
hub / github.com/huggingface/datasets / xgetsize

Function xgetsize

src/datasets/utils/file_utils.py:749–774  ·  view source on GitHub ↗

Extend `os.path.getsize` function to support remote files. Args: path (`str`): URL path. download_config : mainly use token or storage_options to support different platforms and auth types. Returns: `int`: optional

(path, download_config: Optional[DownloadConfig] = None)

Source from the content-addressed store, hash-verified

747
748
749def xgetsize(path, download_config: Optional[DownloadConfig] = None) -> int:
750 """Extend `os.path.getsize` function to support remote files.
751
752 Args:
753 path (`str`): URL path.
754 download_config : mainly use token or storage_options to support different platforms and auth types.
755
756 Returns:
757 `int`: optional
758 """
759 main_hop, *rest_hops = str(path).split("::")
760 if is_local_path(main_hop):
761 return os.path.getsize(path)
762 else:
763 path, storage_options = _prepare_path_and_storage_options(path, download_config=download_config)
764 main_hop, *rest_hops = path.split("::")
765 fs, *_ = fs, *_ = url_to_fs(path, **storage_options)
766 try:
767 size = fs.size(main_hop)
768 except huggingface_hub.utils.EntryNotFoundError:
769 raise FileNotFoundError(f"No such file: {path}")
770 if size is None:
771 # use xopen instead of fs.open to make data fetching more robust
772 with xopen(path, download_config=download_config) as f:
773 size = len(f.read())
774 return size
775
776
777def xisdir(path, download_config: Optional[DownloadConfig] = None) -> bool:

Callers 3

test_xgetsizeFunction · 0.90
test_xgetsize_privateFunction · 0.90
extra_nbytes_visitorMethod · 0.85

Calls 5

is_local_pathFunction · 0.85
xopenFunction · 0.85
splitMethod · 0.80
readMethod · 0.45

Tested by 2

test_xgetsizeFunction · 0.72
test_xgetsize_privateFunction · 0.72