(ar, return_index=False, return_inverse=False, return_counts=False)
| 1767 | |
| 1768 | @derived_from(np) |
| 1769 | def unique(ar, return_index=False, return_inverse=False, return_counts=False): |
| 1770 | # Test whether the downstream library supports structured arrays. If the |
| 1771 | # `np.empty_like` call raises a `TypeError`, the downstream library (e.g., |
| 1772 | # CuPy) doesn't support it. In that case we return the |
| 1773 | # `unique_no_structured_arr` implementation, otherwise (e.g., NumPy) just |
| 1774 | # continue as normal. |
| 1775 | try: |
| 1776 | meta = meta_from_array(ar) |
| 1777 | np.empty_like(meta, dtype=[("a", int), ("b", float)]) |
| 1778 | except TypeError: |
| 1779 | return unique_no_structured_arr( |
| 1780 | ar, |
| 1781 | return_index=return_index, |
| 1782 | return_inverse=return_inverse, |
| 1783 | return_counts=return_counts, |
| 1784 | ) |
| 1785 | |
| 1786 | orig_shape = ar.shape |
| 1787 | ar = ar.ravel() |
| 1788 | |
| 1789 | # Run unique on each chunk and collect results in a Dask Array of |
| 1790 | # unknown size. |
| 1791 | |
| 1792 | args = [ar, "i"] |
| 1793 | out_dtype = [("values", ar.dtype)] |
| 1794 | if return_index: |
| 1795 | args.extend([arange(ar.shape[0], dtype=np.intp, chunks=ar.chunks[0]), "i"]) |
| 1796 | out_dtype.append(("indices", np.intp)) |
| 1797 | else: |
| 1798 | args.extend([None, None]) |
| 1799 | if return_counts: |
| 1800 | args.extend([ones((ar.shape[0],), dtype=np.intp, chunks=ar.chunks[0]), "i"]) |
| 1801 | out_dtype.append(("counts", np.intp)) |
| 1802 | else: |
| 1803 | args.extend([None, None]) |
| 1804 | |
| 1805 | out = blockwise(_unique_internal, "i", *args, dtype=out_dtype, return_inverse=False) |
| 1806 | out._chunks = tuple((np.nan,) * len(c) for c in out.chunks) |
| 1807 | |
| 1808 | # Take the results from the unique chunks and do the following. |
| 1809 | # |
| 1810 | # 1. Collect all results as arguments. |
| 1811 | # 2. Concatenate each result into one big array. |
| 1812 | # 3. Pass all results as arguments to the internal unique again. |
| 1813 | # |
| 1814 | # TODO: This should be replaced with a tree reduction using this strategy. |
| 1815 | # xref: https://github.com/dask/dask/issues/2851 |
| 1816 | |
| 1817 | out_parts = [out["values"]] |
| 1818 | if return_index: |
| 1819 | out_parts.append(out["indices"]) |
| 1820 | else: |
| 1821 | out_parts.append(None) |
| 1822 | if return_counts: |
| 1823 | out_parts.append(out["counts"]) |
| 1824 | else: |
| 1825 | out_parts.append(None) |
| 1826 |
searching dependent graphs…