MCPcopy Index your code
hub / github.com/InternLM/lmdeploy / ChatGLM4Tokenizer

Class ChatGLM4Tokenizer

lmdeploy/tokenizer.py:352–371  ·  view source on GitHub ↗

Tokenizer of GLM4.

Source from the content-addressed store, hash-verified

350
351
352class ChatGLM4Tokenizer(HuggingFaceTokenizer):
353 """Tokenizer of GLM4."""
354
355 def __init__(self, model_path, trust_remote_code: bool = False):
356 super().__init__(model_path, trust_remote_code=trust_remote_code)
357 original_pad = self.model._pad
358
359 def __pad(*args, **kwargs):
360 if 'padding_side' in kwargs:
361 kwargs.pop('padding_side')
362 return original_pad(*args, **kwargs)
363
364 # fix for transformers>4.45.0
365 self.model._pad = __pad
366
367 def encode(self, s: str, add_bos: bool = True, add_special_tokens: bool = True, **kwargs):
368 """Tokenize a prompt."""
369 # ChtGLM4Tokenizer hardcode `add_speical_tokens=False` when tokenizing
370 # a prompt. Refer to https://huggingface.co/THUDM/glm-4-9b-chat/blob/main/tokenization_chatglm.py#L227 # noqa E501
371 return super().encode(s, add_bos, add_special_tokens=False, **kwargs)
372
373
374class ChatGLMTokenizer(HuggingFaceTokenizer):

Callers 1

__init__Method · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected