(self, text)
| 142 | return word |
| 143 | |
| 144 | def encode(self, text): |
| 145 | bpe_tokens = [] |
| 146 | text = whitespace_clean(basic_clean(text)).lower() |
| 147 | for token in re.findall(self.pat, text): |
| 148 | token = "".join(self.byte_encoder[b] for b in token.encode("utf-8")) |
| 149 | bpe_tokens.extend( |
| 150 | self.encoder[bpe_token] for bpe_token in self.bpe(token).split(" ") |
| 151 | ) |
| 152 | return bpe_tokens |
| 153 | |
| 154 | def decode(self, tokens): |
| 155 | text = "".join([self.decoder[token] for token in tokens]) |
no test coverage detected