(tmpdir)
| 187 | |
| 188 | |
| 189 | def test_partition_pruning(tmpdir): |
| 190 | with dask.config.set({"dataframe.parquet.minimum-partition-size": 1}): |
| 191 | filesystem = fs.LocalFileSystem() |
| 192 | df = from_pandas( |
| 193 | pd.DataFrame( |
| 194 | { |
| 195 | "a": [1, 2, 3, 4, 5] * 10, |
| 196 | "b": range(50), |
| 197 | } |
| 198 | ), |
| 199 | npartitions=2, |
| 200 | ) |
| 201 | df.to_parquet(tmpdir, partition_on=["a"]) |
| 202 | ddf = read_parquet(tmpdir, filesystem=filesystem) |
| 203 | ddf_filtered = read_parquet( |
| 204 | tmpdir, filters=[[("a", "==", 1)]], filesystem=filesystem |
| 205 | ) |
| 206 | assert ddf_filtered.npartitions == ddf.npartitions // 5 |
| 207 | |
| 208 | ddf_optimize = read_parquet(tmpdir, filesystem=filesystem) |
| 209 | ddf_optimize = ddf_optimize[ddf_optimize.a == 1].optimize() |
| 210 | assert ddf_optimize.npartitions == ddf.npartitions // 5 |
| 211 | assert_eq( |
| 212 | ddf_filtered, |
| 213 | ddf_optimize, |
| 214 | # FIXME ? |
| 215 | check_names=False, |
| 216 | ) |
| 217 | |
| 218 | |
| 219 | def test_predicate_pushdown(tmpdir): |
nothing calls this directly
no test coverage detected
searching dependent graphs…