Extract the indices of the k largest elements from a on the given axis, and return them sorted from largest to smallest. If k is negative, extract the indices of the -k smallest elements instead, and return them sorted from smallest to largest. This performs best when ``k`` is much
(a, k, axis=-1, split_every=None)
| 1404 | |
| 1405 | |
| 1406 | def argtopk(a, k, axis=-1, split_every=None): |
| 1407 | """Extract the indices of the k largest elements from a on the given axis, |
| 1408 | and return them sorted from largest to smallest. If k is negative, extract |
| 1409 | the indices of the -k smallest elements instead, and return them sorted |
| 1410 | from smallest to largest. |
| 1411 | |
| 1412 | This performs best when ``k`` is much smaller than the chunk size. All |
| 1413 | results will be returned in a single chunk along the given axis. |
| 1414 | |
| 1415 | Parameters |
| 1416 | ---------- |
| 1417 | x: Array |
| 1418 | Data being sorted |
| 1419 | k: int |
| 1420 | axis: int, optional |
| 1421 | split_every: int >=2, optional |
| 1422 | See :func:`topk`. The performance considerations for topk also apply |
| 1423 | here. |
| 1424 | |
| 1425 | Returns |
| 1426 | ------- |
| 1427 | Selection of np.intp indices of x with size abs(k) along the given axis. |
| 1428 | |
| 1429 | Examples |
| 1430 | -------- |
| 1431 | >>> import dask.array as da |
| 1432 | >>> x = np.array([5, 1, 3, 6]) |
| 1433 | >>> d = da.from_array(x, chunks=2) |
| 1434 | >>> d.argtopk(2).compute() |
| 1435 | array([3, 0]) |
| 1436 | >>> d.argtopk(-2).compute() |
| 1437 | array([1, 2]) |
| 1438 | """ |
| 1439 | axis = validate_axis(axis, a.ndim) |
| 1440 | |
| 1441 | # Generate nodes where every chunk is a tuple of (a, original index of a) |
| 1442 | idx = arange(a.shape[axis], chunks=(a.chunks[axis],), dtype=np.intp) |
| 1443 | idx = idx[tuple(slice(None) if i == axis else np.newaxis for i in range(a.ndim))] |
| 1444 | a_plus_idx = a.map_blocks(chunk.argtopk_preprocess, idx, dtype=object) |
| 1445 | |
| 1446 | # chunk and combine steps of the reduction. They acquire in input a tuple |
| 1447 | # of (a, original indices of a) and return another tuple containing the top |
| 1448 | # k elements of a and the matching original indices. The selection is not |
| 1449 | # sorted internally, as in np.argpartition. |
| 1450 | chunk_combine = partial(chunk.argtopk, k=k) |
| 1451 | # aggregate step of the reduction. Internally invokes the chunk/combine |
| 1452 | # function, then sorts the results internally, drops a and returns the |
| 1453 | # index only. |
| 1454 | aggregate = partial(chunk.argtopk_aggregate, k=k) |
| 1455 | |
| 1456 | if isinstance(axis, Number): |
| 1457 | naxis = 1 |
| 1458 | else: |
| 1459 | naxis = len(axis) |
| 1460 | |
| 1461 | meta = a._meta.astype(np.intp).reshape((0,) * (a.ndim - naxis + 1)) |
| 1462 | |
| 1463 | return reduction( |
no test coverage detected
searching dependent graphs…