()
| 365 | |
| 366 | |
| 367 | def main(): |
| 368 | |
| 369 | parser = HfArgumentParser((ModelArguments, DataTrainingArguments, MyTrainingArguments)) |
| 370 | if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): |
| 371 | # If we pass only one argument to the script and it's the path to a json file, |
| 372 | # let's parse it to get our arguments. |
| 373 | model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) |
| 374 | else: |
| 375 | model_args, data_args, training_args = parser.parse_args_into_dataclasses() |
| 376 | |
| 377 | # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The |
| 378 | # information sent is the one passed as arguments along with your Python/PyTorch versions. |
| 379 | send_example_telemetry("run_clm", model_args, data_args) |
| 380 | |
| 381 | # Setup logging |
| 382 | logging.basicConfig(format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",datefmt="%m/%d/%Y %H:%M:%S", |
| 383 | level=logging.INFO, # if training_args.local_rank in [-1, 0] else logging.WARN, |
| 384 | handlers=[logging.StreamHandler(sys.stdout)],) |
| 385 | |
| 386 | if training_args.should_log: |
| 387 | # The default of training_args.log_level is passive, so we set log level at info here to have that default. |
| 388 | transformers.utils.logging.set_verbosity_info() |
| 389 | |
| 390 | log_level = training_args.get_process_log_level() |
| 391 | logger.setLevel(log_level) |
| 392 | datasets.utils.logging.set_verbosity(log_level) |
| 393 | transformers.utils.logging.set_verbosity(log_level) |
| 394 | transformers.utils.logging.enable_default_handler() |
| 395 | transformers.utils.logging.enable_explicit_format() |
| 396 | # transformers.tokenization_utils.logging.set_verbosity_warning() |
| 397 | |
| 398 | # Log on each process the small summary: |
| 399 | logger.warning( |
| 400 | f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" |
| 401 | + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" |
| 402 | ) |
| 403 | |
| 404 | # Detecting last checkpoint. |
| 405 | last_checkpoint = None |
| 406 | if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: |
| 407 | last_checkpoint = get_last_checkpoint(training_args.output_dir) |
| 408 | if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0: |
| 409 | raise ValueError( |
| 410 | f"Output directory ({training_args.output_dir}) already exists and is not empty. " |
| 411 | "Use --overwrite_output_dir to overcome." |
| 412 | ) |
| 413 | elif last_checkpoint is not None and training_args.resume_from_checkpoint is None: |
| 414 | logger.info( |
| 415 | f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " |
| 416 | "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." |
| 417 | ) |
| 418 | |
| 419 | # Set seed before initializing model. |
| 420 | set_seed(training_args.seed) |
| 421 | |
| 422 | config_kwargs = { |
| 423 | "cache_dir": model_args.cache_dir, |
| 424 | "revision": model_args.model_revision, |
no test coverage detected