Runs basic whitespace cleaning and splitting on a piece of text.
(text)
| 78 | |
| 79 | |
| 80 | def whitespace_tokenize(text): |
| 81 | """Runs basic whitespace cleaning and splitting on a piece of text.""" |
| 82 | text = text.strip() |
| 83 | if not text: |
| 84 | return [] |
| 85 | tokens = text.split() |
| 86 | return tokens |
| 87 | |
| 88 | |
| 89 | class BertTokenizer(PreTrainedTokenizer): |