(
shutdown_only, configure_shuffle_method, disable_fallback_to_object_extension
)
| 111 | |
| 112 | |
| 113 | def test_random_shuffle( |
| 114 | shutdown_only, configure_shuffle_method, disable_fallback_to_object_extension |
| 115 | ): |
| 116 | # Assert random 2 distinct random-shuffle pipelines yield different orders |
| 117 | r1 = ray.data.range(100).random_shuffle().take(999) |
| 118 | r2 = ray.data.range(100).random_shuffle().take(999) |
| 119 | assert r1 != r2, (r1, r2) |
| 120 | |
| 121 | # Assert same random-shuffle pipeline yielding 2 different orders, |
| 122 | # when executed |
| 123 | ds = ray.data.range(100).random_shuffle() |
| 124 | r1 = ds.take(999) |
| 125 | r2 = ds.take(999) |
| 126 | assert r1 != r2, (r1, r2) |
| 127 | |
| 128 | r1 = ray.data.range(100, override_num_blocks=1).random_shuffle().take(999) |
| 129 | r2 = ray.data.range(100, override_num_blocks=1).random_shuffle().take(999) |
| 130 | assert r1 != r2, (r1, r2) |
| 131 | |
| 132 | assert ( |
| 133 | ray.data.range(100) |
| 134 | .random_shuffle() |
| 135 | .repartition(1) |
| 136 | ._logical_plan.initial_num_blocks() |
| 137 | == 1 |
| 138 | ) |
| 139 | r1 = ray.data.range(100).random_shuffle().repartition(1).take(999) |
| 140 | r2 = ray.data.range(100).random_shuffle().repartition(1).take(999) |
| 141 | assert r1 != r2, (r1, r2) |
| 142 | |
| 143 | r0 = ray.data.range(100, override_num_blocks=5).take(999) |
| 144 | r1 = ray.data.range(100, override_num_blocks=5).random_shuffle(seed=0).take(999) |
| 145 | r2 = ray.data.range(100, override_num_blocks=5).random_shuffle(seed=0).take(999) |
| 146 | r3 = ray.data.range(100, override_num_blocks=5).random_shuffle(seed=12345).take(999) |
| 147 | assert r1 == r2, (r1, r2) |
| 148 | assert r1 != r0, (r1, r0) |
| 149 | assert r1 != r3, (r1, r3) |
| 150 | |
| 151 | r0 = ray.data.range(100, override_num_blocks=5).take(999) |
| 152 | r1 = ray.data.range(100, override_num_blocks=5).random_shuffle(seed=0).take(999) |
| 153 | r2 = ray.data.range(100, override_num_blocks=5).random_shuffle(seed=0).take(999) |
| 154 | assert r1 == r2, (r1, r2) |
| 155 | assert r1 != r0, (r1, r0) |
| 156 | |
| 157 | # Test move. |
| 158 | ds = ray.data.range(100, override_num_blocks=2) |
| 159 | r1 = ds.random_shuffle().take(999) |
| 160 | ds = ds.map(lambda x: x).take(999) |
| 161 | r2 = ray.data.range(100).random_shuffle().take(999) |
| 162 | assert r1 != r2, (r1, r2) |
| 163 | |
| 164 | # Test empty dataset. |
| 165 | ds = ray.data.from_items([]) |
| 166 | r1 = ds.random_shuffle() |
| 167 | assert r1.count() == 0 |
| 168 | assert r1.take() == ds.take() |
| 169 | |
| 170 |
nothing calls this directly
no test coverage detected
searching dependent graphs…