(tmpdir, meta, label, allow_projection, enforce_metadata)
| 296 | @pytest.mark.parametrize("allow_projection", [True, False]) |
| 297 | @pytest.mark.parametrize("enforce_metadata", [True, False]) |
| 298 | def test_from_map(tmpdir, meta, label, allow_projection, enforce_metadata): |
| 299 | pdf = pd.DataFrame({c: range(10) for c in "abcdefghijklmn"}) |
| 300 | dd.from_pandas(pdf, 3).to_parquet(tmpdir, write_index=False) |
| 301 | files = sorted(glob.glob(f"{tmpdir}/*.parquet")) |
| 302 | if allow_projection: |
| 303 | func = pd.read_parquet |
| 304 | else: |
| 305 | func = lambda *args, **kwargs: pd.read_parquet(*args, **kwargs) |
| 306 | options = { |
| 307 | "enforce_metadata": enforce_metadata, |
| 308 | "label": label, |
| 309 | } |
| 310 | if meta: |
| 311 | options["meta"] = pdf.iloc[:0] |
| 312 | |
| 313 | df = from_map(func, files, **options) |
| 314 | assert_eq(df, pdf, check_index=False) |
| 315 | assert_eq(df["a"], pdf["a"], check_index=False) |
| 316 | assert_eq(df[["a"]], pdf[["a"]], check_index=False) |
| 317 | assert_eq(df[["a", "b"]], pdf[["a", "b"]], check_index=False) |
| 318 | assert all( |
| 319 | tsk.data_producer |
| 320 | for key, tsk in df.__dask_graph__().items() |
| 321 | if not key[0].startswith("_to_string") |
| 322 | ) |
| 323 | if allow_projection: |
| 324 | assert all( |
| 325 | tsk.data_producer |
| 326 | for key, tsk in df[["a"]].optimize(fuse=False).__dask_graph__().items() |
| 327 | if not key[0].startswith("_to_string") |
| 328 | ) |
| 329 | |
| 330 | if label: |
| 331 | if pyarrow_strings_enabled(): |
| 332 | assert df.expr.frame._name.startswith(label) |
| 333 | else: |
| 334 | assert df.expr._name.startswith(label) |
| 335 | |
| 336 | if allow_projection: |
| 337 | got = df[["a", "b"]].optimize(fuse=False) |
| 338 | if pyarrow_strings_enabled(): |
| 339 | assert isinstance(got.expr.frame, FromMap) |
| 340 | assert got.expr.frame.operand("columns") == ["a", "b"] |
| 341 | else: |
| 342 | assert isinstance(got.expr, FromMap) |
| 343 | assert got.expr.operand("columns") == ["a", "b"] |
| 344 | |
| 345 | # Check that we can always pass columns up front |
| 346 | if meta: |
| 347 | options["meta"] = options["meta"][["a", "b"]] |
| 348 | result = from_map(func, files, columns=["a", "b"], **options) |
| 349 | assert_eq(result, pdf[["a", "b"]], check_index=False) |
| 350 | if meta: |
| 351 | options["meta"] = options["meta"][["a"]] |
| 352 | result = from_map(func, files, columns="a", **options) |
| 353 | assert_eq(result, pdf[["a"]], check_index=False) |
| 354 | |
| 355 | # Check the case that func returns a Series |
nothing calls this directly
no test coverage detected
searching dependent graphs…