Map a function from str -> str element-wise over ``self``. ``f`` will be applied exactly once to each non-missing unique value in ``self``. Missing values will always map to ``self.missing_value``.
(self, f)
| 650 | return results[self.as_int_array()] |
| 651 | |
| 652 | def map(self, f): |
| 653 | """ |
| 654 | Map a function from str -> str element-wise over ``self``. |
| 655 | |
| 656 | ``f`` will be applied exactly once to each non-missing unique value in |
| 657 | ``self``. Missing values will always map to ``self.missing_value``. |
| 658 | """ |
| 659 | # f() should only return None if None is our missing value. |
| 660 | if self.missing_value is None: |
| 661 | allowed_outtypes = self.SUPPORTED_SCALAR_TYPES |
| 662 | else: |
| 663 | allowed_outtypes = self.SUPPORTED_NON_NONE_SCALAR_TYPES |
| 664 | |
| 665 | def f_to_use(x, |
| 666 | missing_value=self.missing_value, |
| 667 | otypes=allowed_outtypes): |
| 668 | |
| 669 | # Don't call f on the missing value; those locations don't exist |
| 670 | # semantically. We return _sortable_sentinel rather than None |
| 671 | # because the np.unique call below sorts the categories array, |
| 672 | # which raises an error on Python 3 because None and str aren't |
| 673 | # comparable. |
| 674 | if x == missing_value: |
| 675 | return _sortable_sentinel |
| 676 | |
| 677 | ret = f(x) |
| 678 | |
| 679 | if not isinstance(ret, otypes): |
| 680 | raise TypeError( |
| 681 | "LabelArray.map expected function {f} to return a string" |
| 682 | " or None, but got {type} instead.\n" |
| 683 | "Value was {value}.".format( |
| 684 | f=f.__name__, |
| 685 | type=type(ret).__name__, |
| 686 | value=ret, |
| 687 | ) |
| 688 | ) |
| 689 | |
| 690 | if ret == missing_value: |
| 691 | return _sortable_sentinel |
| 692 | |
| 693 | return ret |
| 694 | |
| 695 | new_categories_with_duplicates = ( |
| 696 | np.vectorize(f_to_use, otypes=[object])(self.categories) |
| 697 | ) |
| 698 | |
| 699 | # If f() maps multiple inputs to the same output, then we can end up |
| 700 | # with the same code duplicated multiple times. Compress the categories |
| 701 | # by running them through np.unique, and then use the reverse lookup |
| 702 | # table to compress codes as well. |
| 703 | new_categories, bloated_inverse_index = np.unique( |
| 704 | new_categories_with_duplicates, |
| 705 | return_inverse=True |
| 706 | ) |
| 707 | |
| 708 | if new_categories[0] is _sortable_sentinel: |
| 709 | # f_to_use return _sortable_sentinel for locations that should be |