MCPcopy
hub / github.com/ymcui/Chinese-LLaMA-Alpaca-2 / main

Function main

scripts/training/run_clm_pt_with_peft.py:367–716  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

365
366
367def main():
368
369 parser = HfArgumentParser((ModelArguments, DataTrainingArguments, MyTrainingArguments))
370 if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
371 # If we pass only one argument to the script and it's the path to a json file,
372 # let's parse it to get our arguments.
373 model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
374 else:
375 model_args, data_args, training_args = parser.parse_args_into_dataclasses()
376
377 # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
378 # information sent is the one passed as arguments along with your Python/PyTorch versions.
379 send_example_telemetry("run_clm", model_args, data_args)
380
381 # Setup logging
382 logging.basicConfig(format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",datefmt="%m/%d/%Y %H:%M:%S",
383 level=logging.INFO, # if training_args.local_rank in [-1, 0] else logging.WARN,
384 handlers=[logging.StreamHandler(sys.stdout)],)
385
386 if training_args.should_log:
387 # The default of training_args.log_level is passive, so we set log level at info here to have that default.
388 transformers.utils.logging.set_verbosity_info()
389
390 log_level = training_args.get_process_log_level()
391 logger.setLevel(log_level)
392 datasets.utils.logging.set_verbosity(log_level)
393 transformers.utils.logging.set_verbosity(log_level)
394 transformers.utils.logging.enable_default_handler()
395 transformers.utils.logging.enable_explicit_format()
396 # transformers.tokenization_utils.logging.set_verbosity_warning()
397
398 # Log on each process the small summary:
399 logger.warning(
400 f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
401 + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
402 )
403
404 # Detecting last checkpoint.
405 last_checkpoint = None
406 if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
407 last_checkpoint = get_last_checkpoint(training_args.output_dir)
408 if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
409 raise ValueError(
410 f"Output directory ({training_args.output_dir}) already exists and is not empty. "
411 "Use --overwrite_output_dir to overcome."
412 )
413 elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
414 logger.info(
415 f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
416 "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
417 )
418
419 # Set seed before initializing model.
420 set_seed(training_args.seed)
421
422 config_kwargs = {
423 "cache_dir": model_args.cache_dir,
424 "revision": model_args.model_revision,

Callers 1

Calls 8

LoraConfigClass · 0.90
get_peft_modelFunction · 0.90
to_dictMethod · 0.80
from_pretrainedMethod · 0.45
trainMethod · 0.45

Tested by

no test coverage detected