| 346 | return self._encode_sparse(examples) if sparse else self._encode_dense(examples) |
| 347 | |
| 348 | def _encode_dense(self, examples): |
| 349 | N = len(examples) |
| 350 | table = np.zeros(N, self.n_dim) # dense |
| 351 | |
| 352 | for row, feat_dict in enumerate(examples): |
| 353 | for f_id, val in feat_dict.items(): |
| 354 | if isinstance(f_id, str): |
| 355 | f_id = f_id.encode("utf-8") |
| 356 | |
| 357 | # use json module to convert the feature id into a unique |
| 358 | # string compatible with the buffer API (required by hashlib) |
| 359 | if isinstance(f_id, (tuple, dict, list)): |
| 360 | f_id = json.dumps(f_id, sort_keys=True).encode("utf-8") |
| 361 | |
| 362 | h = int(self.hash(f_id).hexdigest(), base=16) |
| 363 | col = h % self.n_dim |
| 364 | table[row, col] += np.sign(h) * val |
| 365 | |
| 366 | return table |
| 367 | |
| 368 | def _encode_sparse(self, examples): |
| 369 | N = len(examples) |