| 63 | |
| 64 | # Get the optimiser object |
| 65 | def get_optimizer(self, lr, optimizer_type="lamb"): |
| 66 | |
| 67 | # Prepare optimiser and schedule |
| 68 | no_decay = ["bias", "LayerNorm.weight"] |
| 69 | |
| 70 | optimizer_grouped_parameters = [ |
| 71 | { |
| 72 | "params": [ |
| 73 | p |
| 74 | for n, p in self.model.named_parameters() |
| 75 | if not any(nd in n for nd in no_decay) |
| 76 | ], |
| 77 | "weight_decay": self.weight_decay, |
| 78 | }, |
| 79 | { |
| 80 | "params": [ |
| 81 | p |
| 82 | for n, p in self.model.named_parameters() |
| 83 | if any(nd in n for nd in no_decay) |
| 84 | ], |
| 85 | "weight_decay": 0.0, |
| 86 | }, |
| 87 | ] |
| 88 | |
| 89 | if optimizer_type == "lamb": |
| 90 | optimizer = Lamb( |
| 91 | optimizer_grouped_parameters, weight_decay=0.1, lr=lr, eps=1e-12 |
| 92 | ) |
| 93 | elif optimizer_type == "adamw": |
| 94 | optimizer = AdamW( |
| 95 | optimizer_grouped_parameters, lr=lr, eps=self.adam_epsilon |
| 96 | ) |
| 97 | |
| 98 | return optimizer |
| 99 | |
| 100 | # Get learning rate scheduler |
| 101 | def get_scheduler(self, optimizer, t_total, schedule_type="warmup_cosine"): |