Performs the learning rate range test. Arguments: start_lr (float, optional): the starting learning rate for the range test. Default: None (uses the learning rate from the optimizer). end_lr (float, optional): the maximum learning rate to test. Default
(
self,
start_lr,
end_lr=10,
use_val_loss=True,
optimizer_type="lamb",
num_iter=100,
step_mode="exp",
smooth_f=0.05,
diverge_th=5,
)
| 796 | self.model.to(self.device) |
| 797 | |
| 798 | def lr_find( |
| 799 | self, |
| 800 | start_lr, |
| 801 | end_lr=10, |
| 802 | use_val_loss=True, |
| 803 | optimizer_type="lamb", |
| 804 | num_iter=100, |
| 805 | step_mode="exp", |
| 806 | smooth_f=0.05, |
| 807 | diverge_th=5, |
| 808 | ): |
| 809 | """Performs the learning rate range test. |
| 810 | Arguments: |
| 811 | start_lr (float, optional): the starting learning rate for the range test. |
| 812 | Default: None (uses the learning rate from the optimizer). |
| 813 | end_lr (float, optional): the maximum learning rate to test. Default: 10. |
| 814 | num_iter (int, optional): the number of iterations over which the test |
| 815 | occurs. Default: 100. |
| 816 | step_mode (str, optional): one of the available learning rate policies, |
| 817 | linear or exponential ("linear", "exp"). Default: "exp". |
| 818 | smooth_f (float, optional): the loss smoothing factor within the [0, 1[ |
| 819 | interval. Disabled if set to 0, otherwise the loss is smoothed using |
| 820 | exponential smoothing. Default: 0.05. |
| 821 | diverge_th (int, optional): the test is stopped when the loss surpasses the |
| 822 | threshold: diverge_th * best_loss. Default: 5. |
| 823 | Reference: |
| 824 | [Training Neural Nets on Larger Batches: Practical Tips for 1-GPU, Multi-GPU & Distributed setups]( |
| 825 | https://medium.com/huggingface/ec88c3e51255) |
| 826 | [thomwolf/gradient_accumulation](https://gist.github.com/thomwolf/ac7a7da6b1888c2eeac8ac8b9b05d3d3) |
| 827 | """ |
| 828 | |
| 829 | # Reset test results |
| 830 | self.history = {"lr": [], "loss": []} |
| 831 | self.best_loss = None |
| 832 | self.state_cacher = StateCacher(True, cache_dir=self.output_dir) |
| 833 | |
| 834 | self.optimizer = self.get_optimizer(lr=start_lr, optimizer_type=optimizer_type) |
| 835 | |
| 836 | if hasattr(self.model, "module"): |
| 837 | self.model = self.model.module |
| 838 | |
| 839 | self.state_cacher.store("model", self.model.state_dict()) |
| 840 | self.state_cacher.store("optimizer", self.optimizer.state_dict()) |
| 841 | |
| 842 | # Parallelize the model architecture |
| 843 | if self.multi_gpu is True: |
| 844 | self.model = torch.nn.DataParallel(self.model) |
| 845 | |
| 846 | # Check if the optimizer is already attached to a scheduler |
| 847 | self._check_for_scheduler() |
| 848 | |
| 849 | # Set the starting learning rate |
| 850 | if start_lr: |
| 851 | self._set_learning_rate(start_lr) |
| 852 | |
| 853 | # Initialize the proper learning rate policy |
| 854 | if step_mode.lower() == "exp": |
| 855 | lr_schedule = ExponentialLR(self.optimizer, end_lr, num_iter) |
nothing calls this directly
no test coverage detected