Construct a HighLevelGraph from a new layer and a set of collections This constructs a HighLevelGraph in the common case where we have a single new layer and a set of old collections on which we want to depend. This pulls out the ``__dask_layers__()`` method of the collecti
(
cls,
name: str,
layer: Graph,
dependencies: Sequence[DaskCollection] = (),
)
| 468 | |
| 469 | @classmethod |
| 470 | def from_collections( |
| 471 | cls, |
| 472 | name: str, |
| 473 | layer: Graph, |
| 474 | dependencies: Sequence[DaskCollection] = (), |
| 475 | ) -> HighLevelGraph: |
| 476 | """Construct a HighLevelGraph from a new layer and a set of collections |
| 477 | |
| 478 | This constructs a HighLevelGraph in the common case where we have a single |
| 479 | new layer and a set of old collections on which we want to depend. |
| 480 | |
| 481 | This pulls out the ``__dask_layers__()`` method of the collections if |
| 482 | they exist, and adds them to the dependencies for this new layer. It |
| 483 | also merges all of the layers from all of the dependent collections |
| 484 | together into the new layers for this graph. |
| 485 | |
| 486 | Parameters |
| 487 | ---------- |
| 488 | name : str |
| 489 | The name of the new layer |
| 490 | layer : Mapping |
| 491 | The graph layer itself |
| 492 | dependencies : List of Dask collections |
| 493 | A list of other dask collections (like arrays or dataframes) that |
| 494 | have graphs themselves |
| 495 | |
| 496 | Examples |
| 497 | -------- |
| 498 | |
| 499 | In typical usage we make a new task layer, and then pass that layer |
| 500 | along with all dependent collections to this method. |
| 501 | |
| 502 | >>> def add(self, other): |
| 503 | ... name = 'add-' + tokenize(self, other) |
| 504 | ... layer = {(name, i): (add, input_key, other) |
| 505 | ... for i, input_key in enumerate(self.__dask_keys__())} |
| 506 | ... graph = HighLevelGraph.from_collections(name, layer, dependencies=[self]) |
| 507 | ... return new_collection(name, graph) |
| 508 | """ |
| 509 | if len(dependencies) == 1: |
| 510 | return cls._from_collection(name, layer, dependencies[0]) |
| 511 | layers = {name: layer} |
| 512 | name_dep: set[str] = set() |
| 513 | deps: dict[str, set[str]] = {name: name_dep} |
| 514 | for collection in toolz.unique(dependencies, key=id): |
| 515 | if is_dask_collection(collection): |
| 516 | graph = collection.__dask_graph__() |
| 517 | if isinstance(graph, HighLevelGraph): |
| 518 | layers.update(graph.layers) |
| 519 | deps.update(graph.dependencies) |
| 520 | name_dep |= set(collection.__dask_layers__()) |
| 521 | else: |
| 522 | key = _get_some_layer_name(collection) |
| 523 | layers[key] = graph |
| 524 | name_dep.add(key) |
| 525 | deps[key] = set() |
| 526 | else: |
| 527 | raise TypeError(type(collection)) |