(self, text)
| 117 | return word |
| 118 | |
| 119 | def encode(self, text): |
| 120 | bpe_tokens = [] |
| 121 | text = whitespace_clean(basic_clean(text)).lower() |
| 122 | for token in re.findall(self.pat, text): |
| 123 | token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) |
| 124 | bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) |
| 125 | return bpe_tokens |
| 126 | |
| 127 | def decode(self, tokens, remove_start_end = True, pad_tokens = set()): |
| 128 | if torch.is_tensor(tokens): |
no test coverage detected