Method from_pretrained

tokenizations/tokenization_bert.py:185–200 · view source on GitHub ↗

Instantiate a BertTokenizer from pre-trained vocabulary files.

(cls, pretrained_model_name_or_path, *inputs, **kwargs)

Source from the content-addressed store, hash-verified

183
184	@classmethod
185	def from_pretrained(cls, pretrained_model_name_or_path, inputs, *kwargs):
186	""" Instantiate a BertTokenizer from pre-trained vocabulary files.
187	"""
188	if pretrained_model_name_or_path in PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES:
189	if '-cased' in pretrained_model_name_or_path and kwargs.get('do_lower_case', True):
190	logger.warning("The pre-trained model you are loading is a cased model but you have not set "
191	"`do_lower_case` to False. We are setting `do_lower_case=False` for you but "
192	"you may want to check this behavior.")
193	kwargs['do_lower_case'] = False
194	elif '-cased' not in pretrained_model_name_or_path and not kwargs.get('do_lower_case', True):
195	logger.warning("The pre-trained model you are loading is an uncased model but you have set "
196	"`do_lower_case` to False. We are setting `do_lower_case=True` for you "
197	"but you may want to check this behavior.")
198	kwargs['do_lower_case'] = True
199
200	return super(BertTokenizer, cls)._from_pretrained(pretrained_model_name_or_path, inputs, *kwargs)
201
202
203	class BasicTokenizer(object):

mainFunction · 0.45

no outgoing calls

no test coverage detected