MCPcopy
hub / github.com/ymcui/Chinese-LLaMA-Alpaca-2 / main

Function main

scripts/training/run_clm_sft_with_peft.py:253–509  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

251
252
253def main():
254
255 parser = HfArgumentParser((ModelArguments, DataTrainingArguments, MyTrainingArguments))
256 if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
257 # If we pass only one argument to the script and it's the path to a json file,
258 # let's parse it to get our arguments.
259 model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
260 else:
261 model_args, data_args, training_args = parser.parse_args_into_dataclasses()
262
263 send_example_telemetry("run_clm", model_args, data_args)
264
265 # Setup logging
266 logging.basicConfig(format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",datefmt="%m/%d/%Y %H:%M:%S",
267 level=logging.INFO, # if training_args.local_rank in [-1, 0] else logging.WARN,
268 handlers=[logging.StreamHandler(sys.stdout)],)
269
270
271 if training_args.should_log:
272 # The default of training_args.log_level is passive, so we set log level at info here to have that default.
273 transformers.utils.logging.set_verbosity_info()
274
275 log_level = training_args.get_process_log_level()
276 logger.setLevel(log_level)
277 datasets.utils.logging.set_verbosity(log_level)
278 transformers.utils.logging.set_verbosity(log_level)
279 transformers.utils.logging.enable_default_handler()
280 transformers.utils.logging.enable_explicit_format()
281 # transformers.tokenization_utils.logging.set_verbosity_warning()
282
283 # Log on each process the small summary:
284 logger.warning(
285 f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
286 + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16 or training_args.bf16}"
287 )
288
289 # Detecting last checkpoint.
290 last_checkpoint = None
291 if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
292 last_checkpoint = get_last_checkpoint(training_args.output_dir)
293 if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
294 raise ValueError(
295 f"Output directory ({training_args.output_dir}) already exists and is not empty. "
296 "Use --overwrite_output_dir to overcome."
297 )
298 elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
299 logger.info(
300 f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
301 "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
302 )
303
304 # Set seed before initializing model.
305 set_seed(training_args.seed)
306
307 config_kwargs = {
308 "cache_dir": model_args.cache_dir,
309 "revision": model_args.model_revision,
310 "use_auth_token": True if model_args.use_auth_token else None,

Callers 1

Calls 10

LoraConfigClass · 0.90
get_peft_modelFunction · 0.90
to_dictMethod · 0.80
from_pretrainedMethod · 0.45
trainMethod · 0.45

Tested by

no test coverage detected