MCPcopy
hub / github.com/Morizeyao/GPT2-Chinese / from_pretrained

Method from_pretrained

tokenizations/tokenization_bert.py:185–200  ·  view source on GitHub ↗

Instantiate a BertTokenizer from pre-trained vocabulary files.

(cls, pretrained_model_name_or_path, *inputs, **kwargs)

Source from the content-addressed store, hash-verified

183
184 @classmethod
185 def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
186 """ Instantiate a BertTokenizer from pre-trained vocabulary files.
187 """
188 if pretrained_model_name_or_path in PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES:
189 if '-cased' in pretrained_model_name_or_path and kwargs.get('do_lower_case', True):
190 logger.warning("The pre-trained model you are loading is a cased model but you have not set "
191 "`do_lower_case` to False. We are setting `do_lower_case=False` for you but "
192 "you may want to check this behavior.")
193 kwargs['do_lower_case'] = False
194 elif '-cased' not in pretrained_model_name_or_path and not kwargs.get('do_lower_case', True):
195 logger.warning("The pre-trained model you are loading is an uncased model but you have set "
196 "`do_lower_case` to False. We are setting `do_lower_case=True` for you "
197 "but you may want to check this behavior.")
198 kwargs['do_lower_case'] = True
199
200 return super(BertTokenizer, cls)._from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
201
202
203class BasicTokenizer(object):

Callers 5

mainFunction · 0.45
mainFunction · 0.45
mainFunction · 0.45
mainFunction · 0.45
mainFunction · 0.45

Calls

no outgoing calls

Tested by

no test coverage detected