(self)
| 106 | self._ignore_verifications = True |
| 107 | |
| 108 | def run(self): |
| 109 | logging.getLogger("filelock").setLevel(ERROR) |
| 110 | if self._name is not None and self._all_configs: |
| 111 | print("Both parameters `config` and `all_configs` can't be used at once.") |
| 112 | exit(1) |
| 113 | path, config_name = self._dataset, self._name |
| 114 | module = dataset_module_factory(path) |
| 115 | builder_cls = get_dataset_builder_class(module) |
| 116 | n_builders = len(builder_cls.BUILDER_CONFIGS) if self._all_configs and builder_cls.BUILDER_CONFIGS else 1 |
| 117 | |
| 118 | def get_builders() -> Generator[DatasetBuilder, None, None]: |
| 119 | if self._all_configs and builder_cls.BUILDER_CONFIGS: |
| 120 | for i, config in enumerate(builder_cls.BUILDER_CONFIGS): |
| 121 | if "config_name" in module.builder_kwargs: |
| 122 | yield builder_cls( |
| 123 | cache_dir=self._cache_dir, |
| 124 | data_dir=self._data_dir, |
| 125 | **module.builder_kwargs, |
| 126 | ) |
| 127 | else: |
| 128 | yield builder_cls( |
| 129 | config_name=config.name, |
| 130 | cache_dir=self._cache_dir, |
| 131 | data_dir=self._data_dir, |
| 132 | **module.builder_kwargs, |
| 133 | ) |
| 134 | else: |
| 135 | if "config_name" in module.builder_kwargs: |
| 136 | yield builder_cls(cache_dir=self._cache_dir, data_dir=self._data_dir, **module.builder_kwargs) |
| 137 | else: |
| 138 | yield builder_cls( |
| 139 | config_name=config_name, |
| 140 | cache_dir=self._cache_dir, |
| 141 | data_dir=self._data_dir, |
| 142 | **module.builder_kwargs, |
| 143 | ) |
| 144 | |
| 145 | for j, builder in enumerate(get_builders()): |
| 146 | print(f"Testing builder '{builder.config.name}' ({j + 1}/{n_builders})") |
| 147 | builder._record_checksums = os.path.exists( |
| 148 | os.path.join(builder.get_imported_module_dir(), datasets.config.DATASETDICT_INFOS_FILENAME) |
| 149 | ) # record checksums only if we need to update a (deprecated) dataset_infos.json |
| 150 | builder.download_and_prepare( |
| 151 | download_mode=DownloadMode.REUSE_CACHE_IF_EXISTS |
| 152 | if not self._force_redownload |
| 153 | else DownloadMode.FORCE_REDOWNLOAD, |
| 154 | verification_mode=VerificationMode.NO_CHECKS |
| 155 | if self._ignore_verifications |
| 156 | else VerificationMode.ALL_CHECKS, |
| 157 | num_proc=self._num_proc, |
| 158 | ) |
| 159 | builder.as_dataset() |
| 160 | |
| 161 | # If save_infos=True, we create the dataset card (README.md) |
| 162 | # The dataset_infos are saved in the YAML part of the README.md |
| 163 | # This is to allow the user to upload them on HF afterwards. |
| 164 | if self._save_infos: |
| 165 | save_infos_dir = os.path.basename(path) if not os.path.isdir(path) else path |
no test coverage detected