Test that remote URLs are correctly selected by the backend resolution system. This tests the fix for issue where netCDF4, h5netcdf, and pydap backends were claiming ALL remote URLs, preventing remote Zarr stores from being auto-detected. See: https://github.com/pydata/xarray/
()
| 7559 | @requires_netCDF4 |
| 7560 | @requires_zarr |
| 7561 | def test_remote_url_backend_auto_detection() -> None: |
| 7562 | """ |
| 7563 | Test that remote URLs are correctly selected by the backend resolution system. |
| 7564 | |
| 7565 | This tests the fix for issue where netCDF4, h5netcdf, and pydap backends were |
| 7566 | claiming ALL remote URLs, preventing remote Zarr stores from being |
| 7567 | auto-detected. |
| 7568 | |
| 7569 | See: https://github.com/pydata/xarray/issues/10801 |
| 7570 | """ |
| 7571 | from xarray.backends.plugins import guess_engine |
| 7572 | |
| 7573 | # Test cases: (url, expected_backend) |
| 7574 | test_cases = [ |
| 7575 | # Remote Zarr URLs |
| 7576 | ("https://example.com/store.zarr", "zarr"), |
| 7577 | ("http://example.com/data.zarr/", "zarr"), |
| 7578 | ("s3://bucket/path/to/data.zarr", "zarr"), |
| 7579 | # Remote netCDF URLs (non-DAP) - netcdf4 wins (first in order, no query params) |
| 7580 | ("https://example.com/file.nc", "netcdf4"), |
| 7581 | ("http://example.com/data.nc4", "netcdf4"), |
| 7582 | ("https://example.com/test.cdf", "netcdf4"), |
| 7583 | ("s3://bucket/path/to/data.nc", "netcdf4"), |
| 7584 | # Remote netCDF URLs with query params - netcdf4 wins |
| 7585 | # Note: Query params are typically indicative of DAP URLs (e.g., OPeNDAP constraint expressions), |
| 7586 | # so we prefer netcdf4 (which has DAP support) over h5netcdf (which doesn't) |
| 7587 | ("https://example.com/data.nc?var=temperature&time=0", "netcdf4"), |
| 7588 | ( |
| 7589 | "http://test.opendap.org/opendap/dap4/StaggeredGrid.nc4?dap4.ce=/time[0:1:0]", |
| 7590 | "netcdf4", |
| 7591 | ), |
| 7592 | # DAP URLs with .nc extensions (no query params) - netcdf4 wins (first in order) |
| 7593 | ("http://test.opendap.org/opendap/dap4/StaggeredGrid.nc4", "netcdf4"), |
| 7594 | ("https://example.com/DAP4/data.nc", "netcdf4"), |
| 7595 | ("http://example.com/data/Dap4/file.nc", "netcdf4"), |
| 7596 | ] |
| 7597 | |
| 7598 | for url, expected_backend in test_cases: |
| 7599 | engine = guess_engine(url) |
| 7600 | assert engine == expected_backend, ( |
| 7601 | f"URL {url!r} should select {expected_backend!r} but got {engine!r}" |
| 7602 | ) |
| 7603 | |
| 7604 | # DAP URLs - netcdf4 should handle these (it comes first in backend order) |
| 7605 | # Both netcdf4 and pydap can open DAP URLs, but netcdf4 has priority |
| 7606 | expected_dap_backend = "netcdf4" |
| 7607 | dap_urls = [ |
| 7608 | # Explicit DAP protocol schemes |
| 7609 | "dap2://opendap.earthdata.nasa.gov/collections/dataset", |
| 7610 | "dap4://opendap.earthdata.nasa.gov/collections/dataset", |
| 7611 | "dap://example.com/dataset", |
| 7612 | "DAP2://example.com/dataset", # uppercase scheme |
| 7613 | "DAP4://example.com/dataset", # uppercase scheme |
| 7614 | # DAP path indicators |
| 7615 | "https://example.com/services/DAP2/dataset", # uppercase in path |
| 7616 | "http://test.opendap.org/opendap/data/nc/file.nc", # /opendap/ path |
| 7617 | "https://coastwatch.pfeg.noaa.gov/erddap/griddap/erdMH1chla8day", # ERDDAP |
| 7618 | "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/", # THREDDS dodsC |
nothing calls this directly
no test coverage detected
searching dependent graphs…