(
readengine, nfiles, parallel, chunks, file_cache_maxsize
)
| 5523 | reason="Flaky test which can cause the worker to crash (so don't xfail). Very open to contributions fixing this" |
| 5524 | ) |
| 5525 | def test_open_mfdataset_manyfiles( |
| 5526 | readengine, nfiles, parallel, chunks, file_cache_maxsize |
| 5527 | ): |
| 5528 | # skip certain combinations |
| 5529 | skip_if_not_engine(readengine) |
| 5530 | |
| 5531 | randdata = np.random.randn(nfiles) |
| 5532 | original = Dataset({"foo": ("x", randdata)}) |
| 5533 | # test standard open_mfdataset approach with too many files |
| 5534 | with create_tmp_files(nfiles) as tmpfiles: |
| 5535 | # split into multiple sets of temp files |
| 5536 | for ii in original.x.values: |
| 5537 | subds = original.isel(x=slice(ii, ii + 1)) |
| 5538 | if readengine != "zarr": |
| 5539 | subds.to_netcdf(tmpfiles[ii], engine=readengine) |
| 5540 | else: # if writeengine == "zarr": |
| 5541 | subds.to_zarr(store=tmpfiles[ii]) |
| 5542 | |
| 5543 | # check that calculation on opened datasets works properly |
| 5544 | with open_mfdataset( |
| 5545 | tmpfiles, |
| 5546 | combine="nested", |
| 5547 | concat_dim="x", |
| 5548 | engine=readengine, |
| 5549 | parallel=parallel, |
| 5550 | chunks=chunks if (not chunks and readengine != "zarr") else "auto", |
| 5551 | ) as actual: |
| 5552 | # check that using open_mfdataset returns dask arrays for variables |
| 5553 | assert isinstance(actual["foo"].data, dask_array_type) |
| 5554 | |
| 5555 | assert_identical(original, actual) |
| 5556 | |
| 5557 | |
| 5558 | @requires_netCDF4 |
nothing calls this directly
no test coverage detected
searching dependent graphs…