(split_every)
| 4017 | |
| 4018 | @pytest.mark.parametrize("split_every", [None, 2]) |
| 4019 | def test_split_out_drop_duplicates(split_every): |
| 4020 | x = np.concatenate([np.arange(10)] * 100)[:, None] |
| 4021 | y = x.copy() |
| 4022 | z = np.concatenate([np.arange(20)] * 50)[:, None] |
| 4023 | rs = np.random.RandomState(1) |
| 4024 | rs.shuffle(x) |
| 4025 | rs.shuffle(y) |
| 4026 | rs.shuffle(z) |
| 4027 | df = pd.DataFrame(np.concatenate([x, y, z], axis=1), columns=["x", "y", "z"]) |
| 4028 | ddf = dd.from_pandas(df, npartitions=20) |
| 4029 | |
| 4030 | for subset, keep in product([None, ["x", "z"]], ["first", "last"]): |
| 4031 | sol = df.drop_duplicates(subset=subset, keep=keep) |
| 4032 | res = ddf.drop_duplicates( |
| 4033 | subset=subset, keep=keep, split_every=split_every, split_out=10 |
| 4034 | ) |
| 4035 | assert res.npartitions == 10 |
| 4036 | assert_eq(sol, res) |
| 4037 | |
| 4038 | |
| 4039 | @pytest.mark.parametrize("split_every", [None, 2]) |
nothing calls this directly
no test coverage detected
searching dependent graphs…