| 696 | |
| 697 | |
| 698 | def test_dropna(): |
| 699 | df = pd.DataFrame( |
| 700 | { |
| 701 | "x": [np.nan, 2, 3, 4, np.nan, 6], |
| 702 | "y": [1, 2, np.nan, 4, np.nan, np.nan], |
| 703 | "z": [1, 2, 3, 4, np.nan, 6], |
| 704 | }, |
| 705 | index=[10, 20, 30, 40, 50, 60], |
| 706 | ) |
| 707 | ddf = dd.from_pandas(df, 3) |
| 708 | |
| 709 | assert_eq(ddf.x.dropna(), df.x.dropna()) |
| 710 | assert_eq(ddf.y.dropna(), df.y.dropna()) |
| 711 | assert_eq(ddf.z.dropna(), df.z.dropna()) |
| 712 | |
| 713 | assert_eq(ddf.dropna(), df.dropna()) |
| 714 | assert_eq(ddf.dropna(how="all"), df.dropna(how="all")) |
| 715 | assert_eq(ddf.dropna(subset=["x"]), df.dropna(subset=["x"])) |
| 716 | assert_eq(ddf.dropna(subset=["y", "z"]), df.dropna(subset=["y", "z"])) |
| 717 | assert_eq( |
| 718 | ddf.dropna(subset=["y", "z"], how="all"), |
| 719 | df.dropna(subset=["y", "z"], how="all"), |
| 720 | ) |
| 721 | |
| 722 | # threshold |
| 723 | assert_eq(ddf.dropna(thresh=None), df.dropna(thresh=None)) |
| 724 | assert_eq(ddf.dropna(thresh=0), df.dropna(thresh=0)) |
| 725 | assert_eq(ddf.dropna(thresh=1), df.dropna(thresh=1)) |
| 726 | assert_eq(ddf.dropna(thresh=2), df.dropna(thresh=2)) |
| 727 | assert_eq(ddf.dropna(thresh=3), df.dropna(thresh=3)) |
| 728 | |
| 729 | # fail when how and thresh are both provided |
| 730 | # see https://github.com/dask/dask/issues/9365 |
| 731 | with pytest.raises(TypeError, match="cannot set both the how and thresh arguments"): |
| 732 | ddf.dropna(how="all", thresh=0) |
| 733 | |
| 734 | # Regression test for https://github.com/dask/dask/issues/6540 |
| 735 | df = pd.DataFrame({"_0": [0, 0, np.nan], "_1": [1, 2, 3]}) |
| 736 | ddf = dd.from_pandas(df, npartitions=2) |
| 737 | assert_eq(ddf.dropna(subset=["_0"]), df.dropna(subset=["_0"])) |
| 738 | |
| 739 | |
| 740 | @pytest.mark.parametrize("lower, upper", [(2, 5), (2.5, 3.5)]) |