Instantiate a BertTokenizer from pre-trained vocabulary files.
(cls, pretrained_model_name_or_path, *inputs, **kwargs)
| 183 | |
| 184 | @classmethod |
| 185 | def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs): |
| 186 | """ Instantiate a BertTokenizer from pre-trained vocabulary files. |
| 187 | """ |
| 188 | if pretrained_model_name_or_path in PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES: |
| 189 | if '-cased' in pretrained_model_name_or_path and kwargs.get('do_lower_case', True): |
| 190 | logger.warning("The pre-trained model you are loading is a cased model but you have not set " |
| 191 | "`do_lower_case` to False. We are setting `do_lower_case=False` for you but " |
| 192 | "you may want to check this behavior.") |
| 193 | kwargs['do_lower_case'] = False |
| 194 | elif '-cased' not in pretrained_model_name_or_path and not kwargs.get('do_lower_case', True): |
| 195 | logger.warning("The pre-trained model you are loading is an uncased model but you have set " |
| 196 | "`do_lower_case` to False. We are setting `do_lower_case=True` for you " |
| 197 | "but you may want to check this behavior.") |
| 198 | kwargs['do_lower_case'] = True |
| 199 | |
| 200 | return super(BertTokenizer, cls)._from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs) |
| 201 | |
| 202 | |
| 203 | class BasicTokenizer(object): |