Implements logic for `split` and `rsplit`.
(
self,
*,
func: Callable,
pre: bool,
dim: Hashable,
sep: str | bytes | Any | None,
maxsplit: int,
)
| 2482 | return self._partitioner(func=self._obj.dtype.type.rpartition, dim=dim, sep=sep) |
| 2483 | |
| 2484 | def _splitter( |
| 2485 | self, |
| 2486 | *, |
| 2487 | func: Callable, |
| 2488 | pre: bool, |
| 2489 | dim: Hashable, |
| 2490 | sep: str | bytes | Any | None, |
| 2491 | maxsplit: int, |
| 2492 | ) -> DataArray: |
| 2493 | """ |
| 2494 | Implements logic for `split` and `rsplit`. |
| 2495 | """ |
| 2496 | if sep is not None: |
| 2497 | sep = self._stringify(sep) |
| 2498 | |
| 2499 | if dim is None: |
| 2500 | f_none = lambda x, isep: func(x, isep, maxsplit) |
| 2501 | return self._apply(func=f_none, func_args=(sep,), dtype=np.object_) |
| 2502 | |
| 2503 | # _apply breaks on an empty array in this case |
| 2504 | if not self._obj.size: |
| 2505 | return self._obj.copy().expand_dims({dim: 0}, axis=-1) |
| 2506 | |
| 2507 | f_count = lambda x, isep: max(len(func(x, isep, maxsplit)), 1) |
| 2508 | maxsplit = ( |
| 2509 | self._apply(func=f_count, func_args=(sep,), dtype=np.int_).max().data.item() |
| 2510 | - 1 |
| 2511 | ) |
| 2512 | |
| 2513 | def _dosplit(mystr, sep, maxsplit=maxsplit, dtype=self._obj.dtype): |
| 2514 | res = func(mystr, sep, maxsplit) |
| 2515 | if len(res) < maxsplit + 1: |
| 2516 | pad = [""] * (maxsplit + 1 - len(res)) |
| 2517 | if pre: |
| 2518 | res += pad |
| 2519 | else: |
| 2520 | res = pad + res |
| 2521 | return np.array(res, dtype=dtype) |
| 2522 | |
| 2523 | # dtype MUST be object or strings can be truncated |
| 2524 | # See: https://github.com/numpy/numpy/issues/8352 |
| 2525 | return duck_array_ops.astype( |
| 2526 | self._apply( |
| 2527 | func=_dosplit, |
| 2528 | func_args=(sep,), |
| 2529 | dtype=np.object_, |
| 2530 | output_core_dims=[[dim]], |
| 2531 | output_sizes={dim: maxsplit}, |
| 2532 | ), |
| 2533 | self._obj.dtype.kind, |
| 2534 | ) |
| 2535 | |
| 2536 | def split( |
| 2537 | self, |
no test coverage detected