Make dataset and collator for supervised fine-tuning.
(
tokenizer: transformers.PreTrainedTokenizer, data_args
)
| 233 | |
| 234 | |
| 235 | def make_supervised_data_module( |
| 236 | tokenizer: transformers.PreTrainedTokenizer, data_args |
| 237 | ) -> Dict: |
| 238 | """Make dataset and collator for supervised fine-tuning.""" |
| 239 | dataset_cls = ( |
| 240 | LazySupervisedDataset if data_args.lazy_preprocess else SupervisedDataset |
| 241 | ) |
| 242 | rank0_print("Loading data...") |
| 243 | |
| 244 | train_json = json.load(open(data_args.data_path, "r")) |
| 245 | train_dataset = dataset_cls(train_json, tokenizer=tokenizer) |
| 246 | |
| 247 | if data_args.eval_data_path: |
| 248 | eval_json = json.load(open(data_args.eval_data_path, "r")) |
| 249 | eval_dataset = dataset_cls(eval_json, tokenizer=tokenizer) |
| 250 | else: |
| 251 | eval_dataset = None |
| 252 | |
| 253 | return dict(train_dataset=train_dataset, eval_dataset=eval_dataset) |
| 254 | |
| 255 | |
| 256 | def train(): |
no test coverage detected
searching dependent graphs…