Format example to single- or multi-turn dialogue.
(example, dialogue_template, is_train=True)
| 203 | |
| 204 | |
| 205 | def prepare_dialogue(example, dialogue_template, is_train=True): |
| 206 | """Format example to single- or multi-turn dialogue.""" |
| 207 | # TODO: make this simpler by just ensuring every dataset has a messages column |
| 208 | if "messages" in example.keys() and example["messages"] is not None: |
| 209 | dialogue_template.messages = example["messages"] |
| 210 | elif all(k in example.keys() for k in ("prompt", "completion")): |
| 211 | # Construct single-turn dialogue from prompt and completion |
| 212 | dialogue_template.messages = [ |
| 213 | {"role": "user", "content": example["prompt"]}, |
| 214 | {"role": "assistant", "content": example["completion"]}, |
| 215 | ] |
| 216 | elif "prompt" in example.keys(): |
| 217 | # Construct single-turn dialogue from prompt (inference only) |
| 218 | dialogue_template.messages = [ |
| 219 | {"role": "user", "content": example["prompt"]}, |
| 220 | ] |
| 221 | else: |
| 222 | raise ValueError( |
| 223 | f"Could not format example as dialogue! Require either `messages` or `[prompt, completion]` or `[prompt]` keys but found {list(example.keys())}" |
| 224 | ) |
| 225 | if is_train: |
| 226 | example["text"] = dialogue_template.get_training_prompt() |
| 227 | else: |
| 228 | example["text"] = dialogue_template.get_inference_prompt() |
| 229 | return example |
| 230 | |
| 231 | |
| 232 | def mask_user_labels(tokenizer, dialogue_template, labels): |
nothing calls this directly
no test coverage detected