hub / github.com/huggingface/alignment-handbook / apply_chat_template

Function apply_chat_template

src/alignment/data.py:42–108 · view source on GitHub ↗

(
    example,
    tokenizer,
    task: Literal["sft", "generation", "rm", "dpo"],
    auto_insert_empty_system_msg: bool = True,
)

Source from the content-addressed store, hash-verified

40
41
42	def apply_chat_template(
43	example,
44	tokenizer,
45	task: Literal["sft", "generation", "rm", "dpo"],
46	auto_insert_empty_system_msg: bool = True,
47	):
48	if task in ["sft", "generation"]:
49	messages = example["messages"]
50	# We add an empty system message if there is none
51	if auto_insert_empty_system_msg:
52	maybe_insert_system_message(messages, tokenizer)
53	example["text"] = tokenizer.apply_chat_template(
54	messages,
55	tokenize=False,
56	add_generation_prompt=True if task == "generation" else False,
57	)
58	elif task == "rm":
59	if all(k in example.keys() for k in ("chosen", "rejected")):
60	chosen_messages = example["chosen"]
61	rejected_messages = example["rejected"]
62	# We add an empty system message if there is none
63	if auto_insert_empty_system_msg:
64	maybe_insert_system_message(chosen_messages, tokenizer)
65	maybe_insert_system_message(rejected_messages, tokenizer)
66
67	example["text_chosen"] = tokenizer.apply_chat_template(chosen_messages, tokenize=False)
68	example["text_rejected"] = tokenizer.apply_chat_template(rejected_messages, tokenize=False)
69	else:
70	raise ValueError(
71	f"Could not format example as dialogue for `rm` task! Require `[chosen, rejected]` keys but found {list(example.keys())}"
72	)
73	elif task in ["dpo", "orpo"]:
74	if all(k in example.keys() for k in ("chosen", "rejected")):
75	if not is_openai_format(example["chosen"]) or not is_openai_format(example["rejected"]):
76	raise ValueError(
77	f"Could not format example as dialogue for `{task}` task! Require OpenAI format for all messages"
78	)
79
80	# For DPO/ORPO, the inputs are triples of (prompt, chosen, rejected), where `chosen` and `rejected` are the final turn of a dialogue
81	# We therefore need to extract the N-1 turns to form the prompt
82	if "prompt" in example and is_openai_format(example["prompt"]):
83	prompt_messages = example["prompt"]
84	chosen_messages = example["chosen"]
85	rejected_messages = example["rejected"]
86	else:
87	prompt_messages = example["chosen"][:-1]
88	# Now we extract the final turn to define chosen/rejected responses
89	chosen_messages = example["chosen"][-1:]
90	rejected_messages = example["rejected"][-1:]
91
92	# Prepend a system message if the first message is not a system message
93	if auto_insert_empty_system_msg:
94	maybe_insert_system_message(prompt_messages, tokenizer)
95
96	example["text_prompt"] = tokenizer.apply_chat_template(prompt_messages, tokenize=False)
97	example["text_chosen"] = tokenizer.apply_chat_template(chosen_messages, tokenize=False)
98	example["text_rejected"] = tokenizer.apply_chat_template(rejected_messages, tokenize=False)
99	else:

Callers

nothing calls this directly

Calls 2

maybe_insert_system_messageFunction · 0.85

is_openai_formatFunction · 0.85

Tested by

no test coverage detected