(self, text)
| 135 | return word |
| 136 | |
| 137 | def encode(self, text): |
| 138 | bpe_tokens = [] |
| 139 | text = whitespace_clean(basic_clean(text)).lower() |
| 140 | for token in re.findall(self.pat, text): |
| 141 | token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) |
| 142 | bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) |
| 143 | return bpe_tokens |
| 144 | |
| 145 | def decode(self, tokens): |
| 146 | text = ''.join([self.decoder[token] for token in tokens]) |
no test coverage detected