| 7 | @pytest.mark.slow |
| 8 | @pytest.mark.parametrize("dataset", REGISTRY) |
| 9 | def test_registry_accessibility(dataset): |
| 10 | source = dataset.get("source", "") |
| 11 | if "hugging" not in source.lower(): |
| 12 | return pytest.skip("skipped dataset") |
| 13 | if not dataset.get("is_active"): |
| 14 | return pytest.skip("skipped dataset") |
| 15 | |
| 16 | dataset_name = dataset.get("dataset_name") |
| 17 | if not dataset_name: |
| 18 | pytest.fail(f"No dataset_name found in {dataset}") |
| 19 | |
| 20 | # Load only metadata (no data download) |
| 21 | try: |
| 22 | ds = load_dataset(dataset_name, split=None) |
| 23 | # Check if metadata is accessible without loading full data |
| 24 | assert ds is not None, f"Failed to load metadata for {dataset_name}" |
| 25 | except Exception as e: |
| 26 | pytest.fail(f"Error loading metadata for {dataset_name}: {str(e)}") |