(self, sent: str)
| 47 | ) |
| 48 | |
| 49 | def tokenize(self, sent: str): |
| 50 | tokenized = self.tokenizer()(sent) |
| 51 | |
| 52 | if self.punctuation_removal: |
| 53 | tokenized = self.remove_punctuation(tokenized) |
| 54 | |
| 55 | if self.character_tokenization: |
| 56 | tokenized = self.SPACE.join(list(tokenized.replace(self.SPACE, self.SPACE_ESCAPE))) |
| 57 | |
| 58 | if self.lowercase: |
| 59 | tokenized = tokenized.lower() |
| 60 | |
| 61 | return tokenized |
no test coverage detected