Factorizes the user_items matrix Parameters ---------- user_items: csr_matrix Matrix of confidences for the liked items. This matrix should be a csr_matrix where the rows of the matrix are the user, and the columns are the items liked by that user.
(self, user_items, show_progress=True, callback=None)
| 70 | self.random_state = random_state |
| 71 | |
| 72 | def fit(self, user_items, show_progress=True, callback=None): |
| 73 | """Factorizes the user_items matrix |
| 74 | |
| 75 | Parameters |
| 76 | ---------- |
| 77 | user_items: csr_matrix |
| 78 | Matrix of confidences for the liked items. This matrix should be a csr_matrix where |
| 79 | the rows of the matrix are the user, and the columns are the items liked by that user. |
| 80 | BPR ignores the weight value of the matrix right now - it treats non zero entries |
| 81 | as a binary signal that the user liked the item. |
| 82 | show_progress : bool, optional |
| 83 | Whether to show a progress bar |
| 84 | callback: Callable, optional |
| 85 | Callable function on each epoch with such arguments as epoch, elapsed time and progress |
| 86 | """ |
| 87 | rs = check_random_state(self.random_state) |
| 88 | user_items = check_csr(user_items) |
| 89 | |
| 90 | # for now, all we handle is float 32 values |
| 91 | if user_items.dtype != np.float32: |
| 92 | user_items = user_items.astype(np.float32) |
| 93 | |
| 94 | users, items = user_items.shape |
| 95 | |
| 96 | # We need efficient user lookup for case of removing own likes |
| 97 | if self.verify_negative_samples and not user_items.has_sorted_indices: |
| 98 | user_items.sort_indices() |
| 99 | |
| 100 | # this basically calculates the 'row' attribute of a COO matrix |
| 101 | # without requiring us to get the whole COO matrix |
| 102 | user_counts = np.ediff1d(user_items.indptr) |
| 103 | userids = np.repeat(np.arange(users), user_counts).astype(user_items.indices.dtype) |
| 104 | |
| 105 | # create factors if not already created. |
| 106 | # Note: the final dimension is for the item bias term - which is set to a 1 for all users |
| 107 | # this simplifies interfacing with approximate nearest neighbours libraries etc |
| 108 | if self.item_factors is None: |
| 109 | item_factors = rs.random((items, self.factors + 1), "float32") - 0.5 |
| 110 | item_factors /= self.factors |
| 111 | |
| 112 | # set factors to all zeros for items without any ratings |
| 113 | item_counts = np.bincount(user_items.indices, minlength=items) |
| 114 | item_factors[item_counts == 0] = np.zeros(self.factors + 1) |
| 115 | self.item_factors = implicit.gpu.Matrix(item_factors) |
| 116 | |
| 117 | if self.user_factors is None: |
| 118 | user_factors = rs.random((users, self.factors + 1), "float32") - 0.5 |
| 119 | user_factors /= self.factors |
| 120 | |
| 121 | # set factors to all zeros for users without any ratings |
| 122 | user_factors[user_counts == 0] = np.zeros(self.factors + 1) |
| 123 | user_factors[:, self.factors] = 1.0 |
| 124 | |
| 125 | self.user_factors = implicit.gpu.Matrix(user_factors) |
| 126 | |
| 127 | self._item_norms = self._user_norms = None |
| 128 | |
| 129 | userids = implicit.gpu.IntVector(userids) |