(args)
| 347 | return optimizer_grouped_parameters |
| 348 | |
| 349 | def prepare_model_optimizer(args): |
| 350 | # Loading Model |
| 351 | model = BertMultiTask(args) |
| 352 | |
| 353 | # Optimizer parameters |
| 354 | optimizer_grouped_parameters = prepare_optimizer_parameters(args, model) |
| 355 | |
| 356 | # DeepSpeed initializer handles FP16, distributed, optimizer automatically. |
| 357 | model.network, optimizer, _, _ = deepspeed.initialize(args=args, |
| 358 | model=model.network, |
| 359 | model_parameters=optimizer_grouped_parameters) |
| 360 | |
| 361 | # Overwrite application configs with DeepSpeed config |
| 362 | args.train_micro_batch_size_per_gpu = model.network.train_micro_batch_size_per_gpu() |
| 363 | args.gradient_accumulation_steps = model.network.gradient_accumulation_steps() |
| 364 | |
| 365 | # Set DeepSpeed info |
| 366 | args.local_rank = model.network.local_rank |
| 367 | args.device = model.network.device |
| 368 | model.set_device(args.device) |
| 369 | args.fp16 = model.network.fp16_enabled() |
| 370 | args.use_lamb = model.network.optimizer_name() == deepspeed.pt.deepspeed_config.LAMB_OPTIMIZER |
| 371 | |
| 372 | # Prepare Summary Writer and saved_models path |
| 373 | if dist.get_rank() == 0: |
| 374 | summary_writer = get_sample_writer(name=args.job_name, base=args.output_dir) |
| 375 | args.summary_writer = summary_writer |
| 376 | os.makedirs(args.saved_model_path, exist_ok=True) |
| 377 | |
| 378 | return model, optimizer |
| 379 | |
| 380 | def load_checkpoint(args, model): |
| 381 | global global_step |
no test coverage detected