MCPcopy
hub / github.com/huggingface/alignment-handbook / get_tokenizer

Function get_tokenizer

src/alignment/model_utils.py:66–94  ·  view source on GitHub ↗

Get the tokenizer for the model.

(
    model_args: ModelArguments, data_args: DataArguments, auto_set_chat_template: bool = True
)

Source from the content-addressed store, hash-verified

64
65
66def get_tokenizer(
67 model_args: ModelArguments, data_args: DataArguments, auto_set_chat_template: bool = True
68) -> PreTrainedTokenizer:
69 """Get the tokenizer for the model."""
70 tokenizer = AutoTokenizer.from_pretrained(
71 (
72 model_args.model_name_or_path
73 if model_args.tokenizer_name_or_path is None
74 else model_args.tokenizer_name_or_path
75 ),
76 revision=model_args.model_revision,
77 trust_remote_code=model_args.trust_remote_code,
78 )
79 if tokenizer.pad_token_id is None:
80 tokenizer.pad_token_id = tokenizer.eos_token_id
81
82 if data_args.truncation_side is not None:
83 tokenizer.truncation_side = data_args.truncation_side
84
85 # Set reasonable default for models without max length
86 if tokenizer.model_max_length > 100_000:
87 tokenizer.model_max_length = 2048
88
89 if data_args.chat_template is not None:
90 tokenizer.chat_template = data_args.chat_template
91 elif auto_set_chat_template and tokenizer.get_chat_template() is None:
92 tokenizer.chat_template = DEFAULT_CHAT_TEMPLATE
93
94 return tokenizer
95
96
97def get_peft_config(model_args: ModelArguments) -> PeftConfig | None:

Callers 9

mainFunction · 0.90
mainFunction · 0.90
mainFunction · 0.90
mainFunction · 0.90
setUpMethod · 0.90

Calls

no outgoing calls

Tested by 5

setUpMethod · 0.72