(ray_start_regular_shared, pandas)
| 7 | |
| 8 | @pytest.mark.parametrize("pandas", [False, True]) |
| 9 | def test_basic(ray_start_regular_shared, pandas): |
| 10 | ds = ray.data.range(100, override_num_blocks=10) |
| 11 | ds = ds.add_column("key", lambda b: b["id"] * 2) |
| 12 | ds = ds.add_column("embedding", lambda b: b["id"] ** 2) |
| 13 | if not pandas: |
| 14 | ds = ds.map_batches( |
| 15 | lambda df: pyarrow.Table.from_pandas(df), batch_format="pandas" |
| 16 | ) |
| 17 | |
| 18 | rad = ds.to_random_access_dataset("key", num_workers=1) |
| 19 | |
| 20 | def expected(i): |
| 21 | return {"id": i, "key": i * 2, "embedding": i**2} |
| 22 | |
| 23 | # Test get. |
| 24 | assert ray.get(rad.get_async(-1)) is None |
| 25 | assert ray.get(rad.get_async(200)) is None |
| 26 | for i in range(100): |
| 27 | assert ray.get(rad.get_async(i * 2 + 1)) is None |
| 28 | assert ray.get(rad.get_async(i * 2)) == expected(i) |
| 29 | |
| 30 | # Test multiget. |
| 31 | results = rad.multiget([-1] + list(range(0, 20, 2)) + list(range(1, 21, 2)) + [200]) |
| 32 | assert results == [None] + [expected(i) for i in range(10)] + [None] * 10 + [None] |
| 33 | |
| 34 | |
| 35 | def test_empty_blocks(ray_start_regular_shared): |
nothing calls this directly
no test coverage detected
searching dependent graphs…