Return new HighLevelGraph with only the tasks required to calculate keys. In other words, remove unnecessary tasks from dask. Parameters ---------- keys iterable of keys or nested list of keys such as the output of ``__dask_keys__()``
(self, keys: Iterable[Key])
| 727 | return ret |
| 728 | |
| 729 | def cull(self, keys: Iterable[Key]) -> HighLevelGraph: |
| 730 | """Return new HighLevelGraph with only the tasks required to calculate keys. |
| 731 | |
| 732 | In other words, remove unnecessary tasks from dask. |
| 733 | |
| 734 | Parameters |
| 735 | ---------- |
| 736 | keys |
| 737 | iterable of keys or nested list of keys such as the output of |
| 738 | ``__dask_keys__()`` |
| 739 | |
| 740 | Returns |
| 741 | ------- |
| 742 | hlg: HighLevelGraph |
| 743 | Culled high level graph |
| 744 | """ |
| 745 | keys_set = set(flatten(keys)) |
| 746 | |
| 747 | # Note: All Layer classes still in existence are of |
| 748 | # one of these types (or subclasses) |
| 749 | # |
| 750 | # - MaterializedLayer |
| 751 | # - Blockwise |
| 752 | # - ArrayOverlapLayer (which is basically as good as MaterializedLayer) |
| 753 | if not any(layer.has_legacy_tasks for layer in self.layers.values()): |
| 754 | all_ext_keys = set() |
| 755 | else: |
| 756 | # FIXME: Technically, we don't need to compute **all** keys but only |
| 757 | # those of the current layer and all of its dependencies, i.e. if |
| 758 | # there are legacy layers for IO followed by many blockwise layers, |
| 759 | # we should still get by without this |
| 760 | all_ext_keys = self.get_all_external_keys() |
| 761 | |
| 762 | ret_layers: dict = {} |
| 763 | layer_dependencies = {} |
| 764 | tok = tokenize(keys_set) |
| 765 | for layer_name in reversed(self._toposort_layers()): |
| 766 | new_layer_name = f"{layer_name}-{tok}" |
| 767 | layer = self.layers[layer_name] |
| 768 | if keys_set: |
| 769 | culled_layer, culled_deps = layer.cull(keys_set, all_ext_keys) |
| 770 | if not culled_deps: |
| 771 | continue |
| 772 | |
| 773 | # Update `keys` with all layer's external key dependencies, |
| 774 | # which are all the layer's dependencies (`culled_deps`) |
| 775 | # excluding the layer's output keys. |
| 776 | for k, d in culled_deps.items(): |
| 777 | keys_set |= d |
| 778 | keys_set.discard(k) |
| 779 | layer = culled_layer |
| 780 | # Save the culled layer and its key dependencies |
| 781 | ret_layers[new_layer_name] = layer |
| 782 | layer_dependencies[new_layer_name] = self.dependencies[layer_name] |
| 783 | |
| 784 | # Converting dict_keys to a real set lets Python optimise the set |
| 785 | # intersection to iterate over the smaller of the two sets. |
| 786 | ret_layers_keys = set(ret_layers.keys()) |