MCPcopy
hub / github.com/huggingface/datasets / get_module

Method get_module

src/datasets/load.py:816–841  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

814 assert self.name.count("/") <= 1
815
816 def get_module(self) -> DatasetModule:
817 cache_dir = os.path.expanduser(str(self.cache_dir or config.HF_DATASETS_CACHE))
818 namespace_and_dataset_name = self.name.split("/")
819 namespace_and_dataset_name[-1] = camelcase_to_snakecase(namespace_and_dataset_name[-1])
820 cached_relative_path = "___".join(namespace_and_dataset_name)
821 cached_datasets_directory_path_root = os.path.join(cache_dir, cached_relative_path)
822 cached_directory_paths = [
823 cached_directory_path
824 for cached_directory_path in glob.glob(os.path.join(cached_datasets_directory_path_root, "*", "*", "*"))
825 if os.path.isdir(cached_directory_path)
826 ]
827 if cached_directory_paths:
828 builder_kwargs = {
829 "repo_id": self.name,
830 "dataset_name": self.name.split("/")[-1],
831 }
832 warning_msg = f"Using the latest cached version of the dataset since {self.name} couldn't be found on the Hugging Face Hub"
833 if config.HF_HUB_OFFLINE:
834 warning_msg += " (offline mode is enabled)."
835 logger.warning(warning_msg)
836 return DatasetModule(
837 "datasets.packaged_modules.cache.cache",
838 "auto",
839 {**builder_kwargs, "version": "auto"},
840 )
841 raise FileNotFoundError(f"Dataset {self.name} is not cached in {self.cache_dir}")
842
843
844class HubBucketDatasetModuleFactory(_DatasetModuleFactory):

Callers 1

Calls 4

camelcase_to_snakecaseFunction · 0.85
DatasetModuleClass · 0.85
splitMethod · 0.80
globMethod · 0.80

Tested by 1