Optimize slices 1. Fuse repeated slices, like x[5:][2:6] -> x[7:11] This is generally not very important since we are fusing those tasks anyway. There is one specific exception to how xarray implements opening netcdf files and subsequent slices. Not merging them together can cause
(dsk)
| 92 | |
| 93 | |
| 94 | def _optimize_slices(dsk): |
| 95 | """Optimize slices |
| 96 | 1. Fuse repeated slices, like x[5:][2:6] -> x[7:11] |
| 97 | |
| 98 | This is generally not very important since we are fusing those tasks anyway. There |
| 99 | is one specific exception to how xarray implements opening netcdf files and subsequent |
| 100 | slices. Not merging them together can cause reading the whole netcdf file before |
| 101 | we drop the unnecessary data. Fusing slices avoids that pattern. |
| 102 | |
| 103 | See https://github.com/pydata/xarray/issues/9926 |
| 104 | """ |
| 105 | fancy_ind_types = (list, np.ndarray) |
| 106 | dsk = dsk.copy() |
| 107 | for _, v in dsk.items(): |
| 108 | if not (isinstance(v, Task) and v.func is _execute_subgraph): |
| 109 | continue |
| 110 | |
| 111 | inner_graph: dict = v.args[0] # type: ignore[annotation-unchecked] |
| 112 | |
| 113 | seen = set() |
| 114 | |
| 115 | for inner_key, inner_value in inner_graph.items(): |
| 116 | if inner_key in seen: |
| 117 | continue |
| 118 | else: |
| 119 | seen.add(inner_key) |
| 120 | |
| 121 | if a_task := _is_getter_task(inner_value): |
| 122 | temp_key = inner_key |
| 123 | get, a, a_index, a_asarray, a_lock = a_task |
| 124 | fused = False |
| 125 | |
| 126 | while a.key in inner_graph and ( |
| 127 | b_task := _is_getter_task(inner_graph[a.key]) |
| 128 | ): |
| 129 | seen.add(a.key) |
| 130 | f2, b, b_index, b_asarray, b_lock = b_task |
| 131 | |
| 132 | if isinstance(a_index, DataNode): |
| 133 | a_index = a_index.value |
| 134 | if isinstance(b_index, DataNode): |
| 135 | b_index = b_index.value |
| 136 | |
| 137 | if a_lock and a_lock is not b_lock: |
| 138 | break |
| 139 | if (type(a_index) is tuple) != (type(b_index) is tuple): |
| 140 | break |
| 141 | if type(a_index) is tuple: |
| 142 | indices = b_index + a_index |
| 143 | if len(a_index) != len(b_index) and any( |
| 144 | i is None for i in indices |
| 145 | ): |
| 146 | break |
| 147 | if f2 is getter_nofancy and not any( |
| 148 | isinstance(i, fancy_ind_types) for i in indices |
| 149 | ): |
| 150 | break |
| 151 | elif f2 is getter_nofancy and ( |
no test coverage detected
searching dependent graphs…