MCPcopy
hub / github.com/Yuliang-Liu/Monkey / main

Function main

project/mini_monkey/internvl/train/minimonkey_chat_finetune.py:606–846  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

604
605
606def main():
607 # Parse input arguments
608 # See all possible arguments in src/transformers/training_args.py
609 # If use DeepSpeed zero3, init_dist must before HfArgumentParser
610 launcher = os.environ.get('LAUNCHER', 'slurm')
611 init_dist(launcher=launcher, backend='nccl')
612 parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
613 if len(sys.argv) == 2 and sys.argv[1].endswith('.json'):
614 # If we pass only one argument to the script, and it's the path to a json file,
615 # let's parse it to get our arguments.
616 model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
617 else:
618 model_args, data_args, training_args = parser.parse_args_into_dataclasses()
619
620 # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
621 # information sent is the one passed as arguments along with your Python/PyTorch versions.
622 # send_example_telemetry('InternV-Chat', model_args, data_args)
623
624 # Setup logging
625 logging.basicConfig(
626 format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
627 datefmt='%m/%d/%Y %H:%M:%S',
628 handlers=[logging.StreamHandler(sys.stdout)],
629 )
630
631 if training_args.should_log:
632 # The default of training_args.log_level is passive, so we set log level at info here to have that default.
633 transformers.utils.logging.set_verbosity_info()
634
635 log_level = training_args.get_process_log_level()
636 logger.setLevel(log_level)
637 set_verbosity(log_level)
638 enable_default_handler()
639 enable_explicit_format()
640
641 # Log on each process the small summary:
642 logger.warning(
643 f'Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}'
644 + f'distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}'
645 )
646 logger.info(f'Training/evaluation parameters {training_args}')
647
648 # Detecting last checkpoint and eventually continue from last checkpoint.
649 last_checkpoint = None
650 if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
651 last_checkpoint = get_last_checkpoint(training_args.output_dir)
652 if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
653 raise ValueError(
654 f'Output directory ({training_args.output_dir}) already exists and is not empty. '
655 'Use --overwrite_output_dir to overcome.'
656 )
657 elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
658 logger.info(
659 f'Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change '
660 'the `--output_dir` or add `--overwrite_output_dir` to train from scratch.'
661 )
662 # Set seed before initializing model.
663 set_seed(training_args.seed)

Callers 1

Calls 15

wrap_backbone_loraMethod · 0.95
wrap_llm_loraMethod · 0.95
init_distFunction · 0.90
TCSLoaderClass · 0.90
InternVLChatConfigClass · 0.90
MiniMonkeyChatModelClass · 0.90
replace_create_optimizerFunction · 0.90
TrainerClass · 0.85
infoMethod · 0.80
convert_tokens_to_idsMethod · 0.80
resize_pos_embeddingsMethod · 0.80
build_datasetsFunction · 0.70

Tested by

no test coverage detected