MCPcopy
hub / github.com/huggingface/alignment-handbook / apply_chat_template

Function apply_chat_template

src/alignment/data.py:42–108  ·  view source on GitHub ↗
(
    example,
    tokenizer,
    task: Literal["sft", "generation", "rm", "dpo"],
    auto_insert_empty_system_msg: bool = True,
)

Source from the content-addressed store, hash-verified

40
41
42def apply_chat_template(
43 example,
44 tokenizer,
45 task: Literal["sft", "generation", "rm", "dpo"],
46 auto_insert_empty_system_msg: bool = True,
47):
48 if task in ["sft", "generation"]:
49 messages = example["messages"]
50 # We add an empty system message if there is none
51 if auto_insert_empty_system_msg:
52 maybe_insert_system_message(messages, tokenizer)
53 example["text"] = tokenizer.apply_chat_template(
54 messages,
55 tokenize=False,
56 add_generation_prompt=True if task == "generation" else False,
57 )
58 elif task == "rm":
59 if all(k in example.keys() for k in ("chosen", "rejected")):
60 chosen_messages = example["chosen"]
61 rejected_messages = example["rejected"]
62 # We add an empty system message if there is none
63 if auto_insert_empty_system_msg:
64 maybe_insert_system_message(chosen_messages, tokenizer)
65 maybe_insert_system_message(rejected_messages, tokenizer)
66
67 example["text_chosen"] = tokenizer.apply_chat_template(chosen_messages, tokenize=False)
68 example["text_rejected"] = tokenizer.apply_chat_template(rejected_messages, tokenize=False)
69 else:
70 raise ValueError(
71 f"Could not format example as dialogue for `rm` task! Require `[chosen, rejected]` keys but found {list(example.keys())}"
72 )
73 elif task in ["dpo", "orpo"]:
74 if all(k in example.keys() for k in ("chosen", "rejected")):
75 if not is_openai_format(example["chosen"]) or not is_openai_format(example["rejected"]):
76 raise ValueError(
77 f"Could not format example as dialogue for `{task}` task! Require OpenAI format for all messages"
78 )
79
80 # For DPO/ORPO, the inputs are triples of (prompt, chosen, rejected), where `chosen` and `rejected` are the final turn of a dialogue
81 # We therefore need to extract the N-1 turns to form the prompt
82 if "prompt" in example and is_openai_format(example["prompt"]):
83 prompt_messages = example["prompt"]
84 chosen_messages = example["chosen"]
85 rejected_messages = example["rejected"]
86 else:
87 prompt_messages = example["chosen"][:-1]
88 # Now we extract the final turn to define chosen/rejected responses
89 chosen_messages = example["chosen"][-1:]
90 rejected_messages = example["rejected"][-1:]
91
92 # Prepend a system message if the first message is not a system message
93 if auto_insert_empty_system_msg:
94 maybe_insert_system_message(prompt_messages, tokenizer)
95
96 example["text_prompt"] = tokenizer.apply_chat_template(prompt_messages, tokenize=False)
97 example["text_chosen"] = tokenizer.apply_chat_template(chosen_messages, tokenize=False)
98 example["text_rejected"] = tokenizer.apply_chat_template(rejected_messages, tokenize=False)
99 else:

Callers

nothing calls this directly

Calls 2

is_openai_formatFunction · 0.85

Tested by

no test coverage detected