| 157 | |
| 158 | |
| 159 | def create_optimizer(opt, model, lr, weight_decay, get_num_layer=None, get_layer_scale=None): |
| 160 | opt_lower = opt.lower() |
| 161 | parameters = model.parameters() |
| 162 | opt_args = dict(lr=lr, weight_decay=weight_decay) |
| 163 | |
| 164 | opt_split = opt_lower.split("_") |
| 165 | opt_lower = opt_split[-1] |
| 166 | |
| 167 | if opt_lower == "adam": |
| 168 | optimizer = optim.Adam(parameters, **opt_args) |
| 169 | elif opt_lower == "adamw": |
| 170 | optimizer = optim.AdamW(parameters, **opt_args) |
| 171 | elif opt_lower == "adadelta": |
| 172 | optimizer = optim.Adadelta(parameters, **opt_args) |
| 173 | elif opt_lower == "sgd": |
| 174 | opt_args["momentum"] = 0.9 |
| 175 | return optim.SGD(parameters, **opt_args) |
| 176 | else: |
| 177 | raise NotImplementedError("Invalid optimizer") |
| 178 | |
| 179 | return optimizer |
| 180 | |
| 181 | |
| 182 | def show_occupied_memory(): |