Write dask array to a stack of .npy files This partitions the dask.array along one axis and stores each block along that axis as a single .npy file in the specified directory Examples -------- >>> x = da.ones((5, 10, 10), chunks=(2, 4, 4)) # doctest: +SKIP >>> da.to_npy_st
(dirname, x, axis=0)
| 6071 | |
| 6072 | |
| 6073 | def to_npy_stack(dirname, x, axis=0): |
| 6074 | """Write dask array to a stack of .npy files |
| 6075 | |
| 6076 | This partitions the dask.array along one axis and stores each block along |
| 6077 | that axis as a single .npy file in the specified directory |
| 6078 | |
| 6079 | Examples |
| 6080 | -------- |
| 6081 | >>> x = da.ones((5, 10, 10), chunks=(2, 4, 4)) # doctest: +SKIP |
| 6082 | >>> da.to_npy_stack('data/', x, axis=0) # doctest: +SKIP |
| 6083 | |
| 6084 | The ``.npy`` files store numpy arrays for ``x[0:2], x[2:4], and x[4:5]`` |
| 6085 | respectively, as is specified by the chunk size along the zeroth axis:: |
| 6086 | |
| 6087 | $ tree data/ |
| 6088 | data/ |
| 6089 | |-- 0.npy |
| 6090 | |-- 1.npy |
| 6091 | |-- 2.npy |
| 6092 | |-- info |
| 6093 | |
| 6094 | The ``info`` file stores the dtype, chunks, and axis information of the array. |
| 6095 | You can load these stacks with the :func:`dask.array.from_npy_stack` function. |
| 6096 | |
| 6097 | >>> y = da.from_npy_stack('data/') # doctest: +SKIP |
| 6098 | |
| 6099 | See Also |
| 6100 | -------- |
| 6101 | from_npy_stack |
| 6102 | """ |
| 6103 | |
| 6104 | chunks = tuple((c if i == axis else (sum(c),)) for i, c in enumerate(x.chunks)) |
| 6105 | xx = x.rechunk(chunks) |
| 6106 | |
| 6107 | if not os.path.exists(dirname): |
| 6108 | os.mkdir(dirname) |
| 6109 | |
| 6110 | meta = {"chunks": chunks, "dtype": x.dtype, "axis": axis} |
| 6111 | |
| 6112 | with open(os.path.join(dirname, "info"), "wb") as f: |
| 6113 | pickle.dump(meta, f) |
| 6114 | |
| 6115 | name = f"to-npy-stack-{uuid.uuid1()}" |
| 6116 | dsk = { |
| 6117 | (name, i): (np.save, os.path.join(dirname, f"{i}.npy"), key) |
| 6118 | for i, key in enumerate(core.flatten(xx.__dask_keys__())) |
| 6119 | } |
| 6120 | |
| 6121 | graph = HighLevelGraph.from_collections(name, dsk, dependencies=[xx]) |
| 6122 | compute_as_if_collection(Array, graph, list(dsk)) |
| 6123 | |
| 6124 | |
| 6125 | def from_npy_stack(dirname, mmap_mode="r"): |
nothing calls this directly
no test coverage detected
searching dependent graphs…