| 814 | assert self.name.count("/") <= 1 |
| 815 | |
| 816 | def get_module(self) -> DatasetModule: |
| 817 | cache_dir = os.path.expanduser(str(self.cache_dir or config.HF_DATASETS_CACHE)) |
| 818 | namespace_and_dataset_name = self.name.split("/") |
| 819 | namespace_and_dataset_name[-1] = camelcase_to_snakecase(namespace_and_dataset_name[-1]) |
| 820 | cached_relative_path = "___".join(namespace_and_dataset_name) |
| 821 | cached_datasets_directory_path_root = os.path.join(cache_dir, cached_relative_path) |
| 822 | cached_directory_paths = [ |
| 823 | cached_directory_path |
| 824 | for cached_directory_path in glob.glob(os.path.join(cached_datasets_directory_path_root, "*", "*", "*")) |
| 825 | if os.path.isdir(cached_directory_path) |
| 826 | ] |
| 827 | if cached_directory_paths: |
| 828 | builder_kwargs = { |
| 829 | "repo_id": self.name, |
| 830 | "dataset_name": self.name.split("/")[-1], |
| 831 | } |
| 832 | warning_msg = f"Using the latest cached version of the dataset since {self.name} couldn't be found on the Hugging Face Hub" |
| 833 | if config.HF_HUB_OFFLINE: |
| 834 | warning_msg += " (offline mode is enabled)." |
| 835 | logger.warning(warning_msg) |
| 836 | return DatasetModule( |
| 837 | "datasets.packaged_modules.cache.cache", |
| 838 | "auto", |
| 839 | {**builder_kwargs, "version": "auto"}, |
| 840 | ) |
| 841 | raise FileNotFoundError(f"Dataset {self.name} is not cached in {self.cache_dir}") |
| 842 | |
| 843 | |
| 844 | class HubBucketDatasetModuleFactory(_DatasetModuleFactory): |