Make dataset and collator for supervised fine-tuning.
(tokenizer: transformers.PreTrainedTokenizer,
data_args)
| 774 | |
| 775 | |
| 776 | def make_supervised_data_module(tokenizer: transformers.PreTrainedTokenizer, |
| 777 | data_args) -> Dict: |
| 778 | """Make dataset and collator for supervised fine-tuning.""" |
| 779 | train_dataset = LazySupervisedDataset(tokenizer=tokenizer, |
| 780 | data_path=data_args.data_path, |
| 781 | data_args=data_args) |
| 782 | data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer) |
| 783 | return dict(train_dataset=train_dataset, |
| 784 | eval_dataset=None, |
| 785 | data_collator=data_collator) |
| 786 | |
| 787 | |
| 788 | def train(attn_implementation=None): |
no test coverage detected