MCPcopy Index your code
hub / github.com/huggingface/datasets / CachedDatasetModuleFactory

Class CachedDatasetModuleFactory

src/datasets/load.py:802–841  ·  view source on GitHub ↗

Get the module of a dataset that has been loaded once already and cached.

Source from the content-addressed store, hash-verified

800
801
802class CachedDatasetModuleFactory(_DatasetModuleFactory):
803 """
804 Get the module of a dataset that has been loaded once already and cached.
805 """
806
807 def __init__(
808 self,
809 name: str,
810 cache_dir: Optional[str] = None,
811 ):
812 self.name = name
813 self.cache_dir = cache_dir
814 assert self.name.count("/") <= 1
815
816 def get_module(self) -> DatasetModule:
817 cache_dir = os.path.expanduser(str(self.cache_dir or config.HF_DATASETS_CACHE))
818 namespace_and_dataset_name = self.name.split("/")
819 namespace_and_dataset_name[-1] = camelcase_to_snakecase(namespace_and_dataset_name[-1])
820 cached_relative_path = "___".join(namespace_and_dataset_name)
821 cached_datasets_directory_path_root = os.path.join(cache_dir, cached_relative_path)
822 cached_directory_paths = [
823 cached_directory_path
824 for cached_directory_path in glob.glob(os.path.join(cached_datasets_directory_path_root, "*", "*", "*"))
825 if os.path.isdir(cached_directory_path)
826 ]
827 if cached_directory_paths:
828 builder_kwargs = {
829 "repo_id": self.name,
830 "dataset_name": self.name.split("/")[-1],
831 }
832 warning_msg = f"Using the latest cached version of the dataset since {self.name} couldn't be found on the Hugging Face Hub"
833 if config.HF_HUB_OFFLINE:
834 warning_msg += " (offline mode is enabled)."
835 logger.warning(warning_msg)
836 return DatasetModule(
837 "datasets.packaged_modules.cache.cache",
838 "auto",
839 {**builder_kwargs, "version": "auto"},
840 )
841 raise FileNotFoundError(f"Dataset {self.name} is not cached in {self.cache_dir}")
842
843
844class HubBucketDatasetModuleFactory(_DatasetModuleFactory):

Callers 2

dataset_module_factoryFunction · 0.85

Calls

no outgoing calls

Tested by 1