MCPcopy
hub / github.com/ray-project/ray / test_global_tabular_sum

Function test_global_tabular_sum

python/ray/data/tests/test_random_e2e.py:32–74  ·  view source on GitHub ↗
(
    ray_start_regular_shared_2_cpus,
    ds_format,
    num_parts,
    configure_shuffle_method,
    disable_fallback_to_object_extension,
)

Source from the content-addressed store, hash-verified

30@pytest.mark.parametrize("num_parts", [1, 30])
31@pytest.mark.parametrize("ds_format", ["arrow", "pandas"])
32def test_global_tabular_sum(
33 ray_start_regular_shared_2_cpus,
34 ds_format,
35 num_parts,
36 configure_shuffle_method,
37 disable_fallback_to_object_extension,
38):
39 seed = int(time.time())
40 print(f"Seeding RNG for test_global_arrow_sum with: {seed}")
41 random.seed(seed)
42 xs = list(range(100))
43 random.shuffle(xs)
44
45 def _to_pandas(ds):
46 return ds.map_batches(lambda x: x, batch_size=None, batch_format="pandas")
47
48 # Test built-in global sum aggregation
49 ds = ray.data.from_items([{"A": x} for x in xs]).repartition(num_parts)
50 if ds_format == "pandas":
51 ds = _to_pandas(ds)
52 assert ds.sum("A") == 4950
53
54 # Test empty dataset
55 ds = ray.data.range(10)
56 if ds_format == "pandas":
57 ds = _to_pandas(ds)
58 assert ds.filter(lambda r: r["id"] > 10).sum("id") is None
59
60 # Test built-in global sum aggregation with nans
61 nan_ds = ray.data.from_items([{"A": x} for x in xs] + [{"A": None}]).repartition(
62 num_parts
63 )
64 if ds_format == "pandas":
65 nan_ds = _to_pandas(nan_ds)
66 assert nan_ds.sum("A") == 4950
67 # Test ignore_nulls=False
68 assert pd.isnull(nan_ds.sum("A", ignore_nulls=False))
69 # Test all nans
70 nan_ds = ray.data.from_items([{"A": None}] * len(xs)).repartition(num_parts)
71 if ds_format == "pandas":
72 nan_ds = _to_pandas(nan_ds)
73 assert nan_ds.sum("A") is None
74 assert pd.isnull(nan_ds.sum("A", ignore_nulls=False))
75
76
77def test_random_block_order_schema(

Callers

nothing calls this directly

Calls 10

printFunction · 0.85
listFunction · 0.85
rangeFunction · 0.70
_to_pandasFunction · 0.70
timeMethod · 0.45
seedMethod · 0.45
shuffleMethod · 0.45
repartitionMethod · 0.45
sumMethod · 0.45
filterMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…