A list of the learned byte pair codewords, decoded into human-readable format
(self)
| 375 | |
| 376 | @property |
| 377 | def codebook(self): |
| 378 | """ |
| 379 | A list of the learned byte pair codewords, decoded into human-readable |
| 380 | format |
| 381 | """ |
| 382 | return [ |
| 383 | self.inverse_transform(t)[0] |
| 384 | for t in self.byte2token.keys() |
| 385 | if isinstance(t, tuple) |
| 386 | ] |
| 387 | |
| 388 | @property |
| 389 | def tokens(self): |