MCPcopy
hub / github.com/dask/dask / test_from_map

Function test_from_map

dask/dataframe/dask_expr/io/tests/test_io.py:298–359  ·  view source on GitHub ↗
(tmpdir, meta, label, allow_projection, enforce_metadata)

Source from the content-addressed store, hash-verified

296@pytest.mark.parametrize("allow_projection", [True, False])
297@pytest.mark.parametrize("enforce_metadata", [True, False])
298def test_from_map(tmpdir, meta, label, allow_projection, enforce_metadata):
299 pdf = pd.DataFrame({c: range(10) for c in "abcdefghijklmn"})
300 dd.from_pandas(pdf, 3).to_parquet(tmpdir, write_index=False)
301 files = sorted(glob.glob(f"{tmpdir}/*.parquet"))
302 if allow_projection:
303 func = pd.read_parquet
304 else:
305 func = lambda *args, **kwargs: pd.read_parquet(*args, **kwargs)
306 options = {
307 "enforce_metadata": enforce_metadata,
308 "label": label,
309 }
310 if meta:
311 options["meta"] = pdf.iloc[:0]
312
313 df = from_map(func, files, **options)
314 assert_eq(df, pdf, check_index=False)
315 assert_eq(df["a"], pdf["a"], check_index=False)
316 assert_eq(df[["a"]], pdf[["a"]], check_index=False)
317 assert_eq(df[["a", "b"]], pdf[["a", "b"]], check_index=False)
318 assert all(
319 tsk.data_producer
320 for key, tsk in df.__dask_graph__().items()
321 if not key[0].startswith("_to_string")
322 )
323 if allow_projection:
324 assert all(
325 tsk.data_producer
326 for key, tsk in df[["a"]].optimize(fuse=False).__dask_graph__().items()
327 if not key[0].startswith("_to_string")
328 )
329
330 if label:
331 if pyarrow_strings_enabled():
332 assert df.expr.frame._name.startswith(label)
333 else:
334 assert df.expr._name.startswith(label)
335
336 if allow_projection:
337 got = df[["a", "b"]].optimize(fuse=False)
338 if pyarrow_strings_enabled():
339 assert isinstance(got.expr.frame, FromMap)
340 assert got.expr.frame.operand("columns") == ["a", "b"]
341 else:
342 assert isinstance(got.expr, FromMap)
343 assert got.expr.operand("columns") == ["a", "b"]
344
345 # Check that we can always pass columns up front
346 if meta:
347 options["meta"] = options["meta"][["a", "b"]]
348 result = from_map(func, files, columns=["a", "b"], **options)
349 assert_eq(result, pdf[["a", "b"]], check_index=False)
350 if meta:
351 options["meta"] = options["meta"][["a"]]
352 result = from_map(func, files, columns="a", **options)
353 assert_eq(result, pdf[["a"]], check_index=False)
354
355 # Check the case that func returns a Series

Callers

nothing calls this directly

Calls 10

from_mapFunction · 0.90
assert_eqFunction · 0.90
pyarrow_strings_enabledFunction · 0.90
allFunction · 0.85
to_parquetMethod · 0.80
read_parquetMethod · 0.80
operandMethod · 0.80
itemsMethod · 0.45
__dask_graph__Method · 0.45
optimizeMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…