(shuffle: bool, chunk: Literal[False] | dict)
| 664 | ], |
| 665 | ) |
| 666 | def test_groupby_drops_nans(shuffle: bool, chunk: Literal[False] | dict) -> None: |
| 667 | if shuffle and chunk and not has_dask_ge_2024_08_1: |
| 668 | pytest.skip() |
| 669 | # GH2383 |
| 670 | # nan in 2D data variable (requires stacking) |
| 671 | ds = xr.Dataset( |
| 672 | { |
| 673 | "variable": (("lat", "lon", "time"), np.arange(60.0).reshape((4, 3, 5))), |
| 674 | "id": (("lat", "lon"), np.arange(12.0).reshape((4, 3))), |
| 675 | }, |
| 676 | coords={"lat": np.arange(4), "lon": np.arange(3), "time": np.arange(5)}, |
| 677 | ) |
| 678 | |
| 679 | ds["id"].values[0, 0] = np.nan |
| 680 | ds["id"].values[3, 0] = np.nan |
| 681 | ds["id"].values[-1, -1] = np.nan |
| 682 | |
| 683 | if chunk: |
| 684 | ds["variable"] = ds["variable"].chunk(chunk) |
| 685 | grouped = ds.groupby(ds.id) |
| 686 | if shuffle: |
| 687 | grouped = grouped.shuffle_to_chunks().groupby(ds.id) |
| 688 | |
| 689 | # non reduction operation |
| 690 | expected1 = ds.copy() |
| 691 | expected1.variable.data[0, 0, :] = np.nan |
| 692 | expected1.variable.data[-1, -1, :] = np.nan |
| 693 | expected1.variable.data[3, 0, :] = np.nan |
| 694 | actual1 = grouped.map(lambda x: x).transpose(*ds.variable.dims) |
| 695 | assert_identical(actual1, expected1) |
| 696 | |
| 697 | # reduction along grouped dimension |
| 698 | actual2 = grouped.mean() |
| 699 | stacked = ds.stack({"xy": ["lat", "lon"]}) |
| 700 | expected2 = ( |
| 701 | stacked.variable.where(stacked.id.notnull()) |
| 702 | .rename({"xy": "id"}) |
| 703 | .to_dataset() |
| 704 | .reset_index("id", drop=True) |
| 705 | .assign(id=stacked.id.values) |
| 706 | .dropna("id") |
| 707 | .transpose(*actual2.variable.dims) |
| 708 | ) |
| 709 | assert_identical(actual2, expected2) |
| 710 | |
| 711 | # reduction operation along a different dimension |
| 712 | actual3 = grouped.mean("time") |
| 713 | expected3 = ds.mean("time").where(ds.id.notnull()) |
| 714 | assert_identical(actual3, expected3) |
| 715 | |
| 716 | # NaN in non-dimensional coordinate |
| 717 | array = xr.DataArray([1, 2, 3], [("x", [1, 2, 3])]) |
| 718 | array["x1"] = ("x", [1, 1, np.nan]) |
| 719 | expected4 = xr.DataArray(3, [("x1", [1])]) |
| 720 | actual4 = array.groupby("x1").sum() |
| 721 | assert_equal(expected4, actual4) |
| 722 | |
| 723 | # NaT in non-dimensional coordinate |
nothing calls this directly
no test coverage detected
searching dependent graphs…