hub / github.com/ray-project/ray / test_zip_multiple_datasets

Function test_zip_multiple_datasets

python/ray/data/tests/test_zip.py:15–36 · view source on GitHub ↗

(ray_start_regular_shared, num_datasets)

Source from the content-addressed store, hash-verified

13
14	@pytest.mark.parametrize("num_datasets", [2, 3, 4, 5, 10])
15	def test_zip_multiple_datasets(ray_start_regular_shared, num_datasets):
16	# Create multiple datasets with different transformations
17	datasets = []
18	for i in range(num_datasets):
19	ds = ray.data.range(5, override_num_blocks=5)
20	if i > 0: # Apply transformation to all but the first dataset
21	ds = ds.map(column_udf("id", lambda x, offset=i: x + offset))
22	datasets.append(ds)
23
24	ds = datasets[0].zip(*datasets[1:])
25
26	# Verify schema names
27	expected_names = ["id"] + [f"id_{i}" for i in range(1, num_datasets)]
28	assert ds.schema().names == expected_names
29
30	# Verify data
31	expected_data = []
32	for row_idx in range(5):
33	row_data = tuple(row_idx + i for i in range(num_datasets))
34	expected_data.append(row_data)
35
36	assert ds.take() == named_values(expected_names, expected_data)
37
38
39	@pytest.mark.parametrize(

nothing calls this directly

column_udfFunction · 0.90

named_valuesFunction · 0.90

rangeFunction · 0.70

mapMethod · 0.45

appendMethod · 0.45

zipMethod · 0.45

schemaMethod · 0.45

takeMethod · 0.45

no test coverage detected

searching dependent graphs…