MCPcopy
hub / github.com/apple/ml-mgie / make_supervised_data_module

Function make_supervised_data_module

mgie_train.py:581–599  ·  view source on GitHub ↗

Make dataset and collator for supervised fine-tuning.

(tokenizer: transformers.PreTrainedTokenizer,
                                data_args)

Source from the content-addressed store, hash-verified

579
580
581def make_supervised_data_module(tokenizer: transformers.PreTrainedTokenizer,
582 data_args) -> Dict:
583 """Make dataset and collator for supervised fine-tuning."""
584 dataset_cls = (LazySupervisedDataset
585 if data_args.lazy_preprocess else SupervisedDataset)
586 train_dataset = dataset_cls(tokenizer=tokenizer,
587 data_path=data_args.data_path,
588 multimodal_cfg=dict(
589 is_multimodal=data_args.is_multimodal,
590 sep_image_conv_front=data_args.sep_image_conv_front,
591 image_token_len=data_args.image_token_len,
592 image_folder=data_args.image_folder,
593 image_aspect_ratio=data_args.image_aspect_ratio,
594 use_im_start_end=getattr(data_args, 'mm_use_im_start_end', False),
595 image_processor=getattr(data_args, 'image_processor', None)))
596 data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
597 return dict(train_dataset=train_dataset,
598 eval_dataset=None,
599 data_collator=data_collator)
600
601
602def train():

Callers 1

trainFunction · 0.85

Tested by

no test coverage detected