(engine)
| 2124 | "engine", ["pandas", pytest.param("cudf", marks=pytest.mark.gpu)] |
| 2125 | ) |
| 2126 | def test_groupby_concat_cudf(engine): |
| 2127 | # NOTE: Issue #5643 Reproducer |
| 2128 | |
| 2129 | size = 6 |
| 2130 | npartitions = 3 |
| 2131 | d1 = pd.DataFrame( |
| 2132 | { |
| 2133 | "a": np.random.permutation(np.arange(size)), |
| 2134 | "b": np.random.randint(100, size=size), |
| 2135 | } |
| 2136 | ) |
| 2137 | d2 = pd.DataFrame( |
| 2138 | { |
| 2139 | "c": np.random.permutation(np.arange(size)), |
| 2140 | "d": np.random.randint(100, size=size), |
| 2141 | } |
| 2142 | ) |
| 2143 | |
| 2144 | if engine == "cudf": |
| 2145 | # NOTE: engine == "cudf" requires cudf/dask_cudf, |
| 2146 | # will be skipped by non-GPU CI. |
| 2147 | |
| 2148 | cudf = pytest.importorskip("cudf") |
| 2149 | dask_cudf = pytest.importorskip("dask_cudf") |
| 2150 | |
| 2151 | d1 = cudf.from_pandas(d1) |
| 2152 | d2 = cudf.from_pandas(d2) |
| 2153 | dd1 = dask_cudf.from_cudf(d1, npartitions) |
| 2154 | dd2 = dask_cudf.from_cudf(d2, npartitions) |
| 2155 | else: |
| 2156 | dd1 = dd.from_pandas(d1, npartitions) |
| 2157 | dd2 = dd.from_pandas(d2, npartitions) |
| 2158 | |
| 2159 | grouped_d1 = d1.groupby(["a"]).sum() |
| 2160 | grouped_d2 = d2.groupby(["c"]).sum() |
| 2161 | res = concat([grouped_d1, grouped_d2], axis=1) |
| 2162 | |
| 2163 | grouped_dd1 = dd1.groupby(["a"]).sum() |
| 2164 | grouped_dd2 = dd2.groupby(["c"]).sum() |
| 2165 | with warnings.catch_warnings(): |
| 2166 | warnings.simplefilter("ignore", UserWarning) |
| 2167 | res_dd = dd.concat([grouped_dd1, grouped_dd2], axis=1) |
| 2168 | |
| 2169 | assert_eq(res_dd.compute().sort_index(), res.sort_index()) |
| 2170 | |
| 2171 | |
| 2172 | @pytest.mark.parametrize("ordered", [True, False]) |
nothing calls this directly
no test coverage detected
searching dependent graphs…