Convert numpy/dask arrays from characters to fixed width bytes.
(arr)
| 204 | |
| 205 | |
| 206 | def char_to_bytes(arr): |
| 207 | """Convert numpy/dask arrays from characters to fixed width bytes.""" |
| 208 | if arr.dtype != "S1": |
| 209 | raise ValueError("argument must have dtype='S1'") |
| 210 | |
| 211 | if not arr.ndim: |
| 212 | # no dimension to concatenate along |
| 213 | return arr |
| 214 | |
| 215 | size = arr.shape[-1] |
| 216 | |
| 217 | if not size: |
| 218 | # can't make an S0 dtype |
| 219 | return np.zeros(arr.shape[:-1], dtype=np.bytes_) |
| 220 | |
| 221 | if is_chunked_array(arr): |
| 222 | chunkmanager = get_chunked_array_type(arr) |
| 223 | |
| 224 | if len(arr.chunks[-1]) > 1: |
| 225 | raise ValueError( |
| 226 | "cannot stacked dask character array with " |
| 227 | f"multiple chunks in the last dimension: {arr}" |
| 228 | ) |
| 229 | |
| 230 | dtype = np.dtype("S" + str(arr.shape[-1])) |
| 231 | return chunkmanager.map_blocks( |
| 232 | _numpy_char_to_bytes, |
| 233 | arr, |
| 234 | dtype=dtype, |
| 235 | chunks=arr.chunks[:-1], |
| 236 | drop_axis=[arr.ndim - 1], |
| 237 | ) |
| 238 | else: |
| 239 | return StackedBytesArray(arr) |
| 240 | |
| 241 | |
| 242 | def _numpy_char_to_bytes(arr): |
no test coverage detected
searching dependent graphs…