(ileft, ii, max_in_chunk, result_inchunks)
| 138 | |
| 139 | |
| 140 | def _smooth_chunks(ileft, ii, max_in_chunk, result_inchunks): |
| 141 | # The previous step squashed the whole dimension into a single |
| 142 | # chunk for ileft + 1 (and potentially combined too many elements |
| 143 | # into a single chunk for ileft as well). We split up the single |
| 144 | # chunk into multiple chunks to match the max_in_chunk to keep |
| 145 | # chunksizes consistent: |
| 146 | # ((1, 1), (200)) -> ((1, 1), (20, ) * 10) for max_in_chunk = 20 |
| 147 | # It's important to ensure that all dimensions before the dimension |
| 148 | # we adjust have all-1 chunks to respect C contiguous arrays |
| 149 | # during the reshaping |
| 150 | # Example: |
| 151 | # Assume arr = da.from_array(np.arange(0, 12).reshape(4, 3), chunks=(2, 3)) |
| 152 | # Reshaping to arr.reshape(-1, ) will return |
| 153 | # [ 0 1 2 3 4 5 6 7 8 9 10 11] |
| 154 | # The first dimension of the reshaped axis are the chunks with length 2 |
| 155 | # Assume we split the second dimension into (2, 1), i.e. setting the chunks to |
| 156 | # ((2, 2), (2, 1)) and the output chunks to ((4, 2, 4, 2), ) |
| 157 | # In this case, the individual chunks do not hold a contiguous sequence. |
| 158 | # For example, the first chunk is [[0, 1], [3, 4]]. |
| 159 | # Then, the result will be different because we first reshape the individual, |
| 160 | # non-contiguous chunks before concatenating them: |
| 161 | # [ 0 1 3 4 2 5 6 7 9 10 8 11] |
| 162 | # This is equivalent to |
| 163 | # arr = np.arange(0, 12).reshape(4, 3) |
| 164 | # np.concatenate(list(map(lambda x: x.reshape(-1), [arr[:2, :2], arr[:2, 2:], arr[2:, :2], arr[2:, 2:]]))) |
| 165 | |
| 166 | ileft_orig = ileft |
| 167 | max_result_in_chunk = _cal_max_chunk_size(result_inchunks, ileft, ii) |
| 168 | if max_in_chunk == max_result_in_chunk: |
| 169 | # reshaping doesn't mess up |
| 170 | return result_inchunks |
| 171 | |
| 172 | while all(x == 1 for x in result_inchunks[ileft]): |
| 173 | # Find the first dimension where we can split chunks |
| 174 | ileft += 1 |
| 175 | |
| 176 | if ileft < ii + 1: |
| 177 | factor = math.ceil(max_result_in_chunk / max_in_chunk) |
| 178 | result_in_chunk = result_inchunks[ileft] |
| 179 | |
| 180 | if len(result_in_chunk) == 1: |
| 181 | # This is a trivial case, when we arrive here is the chunk we are |
| 182 | # splitting the same length as the whole dimension and all previous |
| 183 | # chunks that are reshaped into the same dimension are all-one. |
| 184 | # So we can split this dimension. |
| 185 | elem = result_in_chunk[0] |
| 186 | factor = min(factor, elem) |
| 187 | ceil_elem = math.ceil(elem / factor) |
| 188 | new_inchunk = [ceil_elem] * factor |
| 189 | for i in range(ceil_elem * factor - elem): |
| 190 | new_inchunk[i] -= 1 |
| 191 | result_inchunks[ileft] = tuple(new_inchunk) |
| 192 | |
| 193 | if all(x == 1 for x in new_inchunk) and ileft < ii: |
| 194 | # might have to do another round |
| 195 | return _smooth_chunks(ileft_orig, ii, max_in_chunk, result_inchunks) |
| 196 | else: |
| 197 | # We are now in the more complicated case. The first dimension in the set |
searching dependent graphs…