MCPcopy
hub / github.com/huggingface/datasets / run

Method run

src/datasets/commands/test.py:108–180  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

106 self._ignore_verifications = True
107
108 def run(self):
109 logging.getLogger("filelock").setLevel(ERROR)
110 if self._name is not None and self._all_configs:
111 print("Both parameters `config` and `all_configs` can't be used at once.")
112 exit(1)
113 path, config_name = self._dataset, self._name
114 module = dataset_module_factory(path)
115 builder_cls = get_dataset_builder_class(module)
116 n_builders = len(builder_cls.BUILDER_CONFIGS) if self._all_configs and builder_cls.BUILDER_CONFIGS else 1
117
118 def get_builders() -> Generator[DatasetBuilder, None, None]:
119 if self._all_configs and builder_cls.BUILDER_CONFIGS:
120 for i, config in enumerate(builder_cls.BUILDER_CONFIGS):
121 if "config_name" in module.builder_kwargs:
122 yield builder_cls(
123 cache_dir=self._cache_dir,
124 data_dir=self._data_dir,
125 **module.builder_kwargs,
126 )
127 else:
128 yield builder_cls(
129 config_name=config.name,
130 cache_dir=self._cache_dir,
131 data_dir=self._data_dir,
132 **module.builder_kwargs,
133 )
134 else:
135 if "config_name" in module.builder_kwargs:
136 yield builder_cls(cache_dir=self._cache_dir, data_dir=self._data_dir, **module.builder_kwargs)
137 else:
138 yield builder_cls(
139 config_name=config_name,
140 cache_dir=self._cache_dir,
141 data_dir=self._data_dir,
142 **module.builder_kwargs,
143 )
144
145 for j, builder in enumerate(get_builders()):
146 print(f"Testing builder '{builder.config.name}' ({j + 1}/{n_builders})")
147 builder._record_checksums = os.path.exists(
148 os.path.join(builder.get_imported_module_dir(), datasets.config.DATASETDICT_INFOS_FILENAME)
149 ) # record checksums only if we need to update a (deprecated) dataset_infos.json
150 builder.download_and_prepare(
151 download_mode=DownloadMode.REUSE_CACHE_IF_EXISTS
152 if not self._force_redownload
153 else DownloadMode.FORCE_REDOWNLOAD,
154 verification_mode=VerificationMode.NO_CHECKS
155 if self._ignore_verifications
156 else VerificationMode.ALL_CHECKS,
157 num_proc=self._num_proc,
158 )
159 builder.as_dataset()
160
161 # If save_infos=True, we create the dataset card (README.md)
162 # The dataset_infos are saved in the YAML part of the README.md
163 # This is to allow the user to upload them on HF afterwards.
164 if self._save_infos:
165 save_infos_dir = os.path.basename(path) if not os.path.isdir(path) else path

Callers 2

test_test_commandFunction · 0.95
mainFunction · 0.45

Calls 9

dataset_module_factoryFunction · 0.90
DatasetInfosDictClass · 0.90
existsMethod · 0.80
as_datasetMethod · 0.80
makedirsMethod · 0.80
download_and_prepareMethod · 0.45
write_to_directoryMethod · 0.45

Tested by

no test coverage detected