| 133 | |
| 134 | |
| 135 | def save_checkpoint(config, epoch, model, max_accuracy, optimizer, lr_scheduler, loss_scaler, logger): |
| 136 | save_state = {'model': model.state_dict(), |
| 137 | 'optimizer': optimizer.state_dict(), |
| 138 | 'lr_scheduler': lr_scheduler.state_dict(), |
| 139 | 'max_accuracy': max_accuracy, |
| 140 | 'scaler': loss_scaler.state_dict(), |
| 141 | 'epoch': epoch, |
| 142 | 'config': config} |
| 143 | |
| 144 | save_path = os.path.join(config.OUTPUT, f'ckpt_epoch_{epoch}.pth') |
| 145 | logger.info(f"{save_path} saving......") |
| 146 | torch.save(save_state, save_path) |
| 147 | logger.info(f"{save_path} saved !!!") |
| 148 | |
| 149 | |
| 150 | def get_grad_norm(parameters, norm_type=2): |