r"""Apply loss scaler for manual backward pass. Use this method when calling loss.backward() directly instead of engine.backward(). This applies the appropriate loss scaler for mixed precision training, allowing you to manually control the backward pass while still benefitin
(self, loss)
| 2797 | optimizer.independent_gradient_partition_epilogue() |
| 2798 | |
| 2799 | def scale(self, loss): |
| 2800 | r"""Apply loss scaler for manual backward pass. |
| 2801 | |
| 2802 | Use this method when calling loss.backward() directly instead of engine.backward(). |
| 2803 | This applies the appropriate loss scaler for mixed precision training, allowing you |
| 2804 | to manually control the backward pass while still benefiting from DeepSpeed's |
| 2805 | gradient scaling functionality. |
| 2806 | |
| 2807 | Example:: |
| 2808 | |
| 2809 | output = engine(input) |
| 2810 | loss = criterion(output, target) |
| 2811 | scaled_loss = engine.scale(loss) |
| 2812 | scaled_loss.backward() # Manual backward call |
| 2813 | engine.step() |
| 2814 | |
| 2815 | Arguments: |
| 2816 | loss: Scalar loss tensor to be scaled |
| 2817 | |
| 2818 | Returns: |
| 2819 | Scaled loss tensor ready for .backward() call |
| 2820 | |
| 2821 | Raises: |
| 2822 | RuntimeError: If AMP (NVIDIA Apex) is enabled. AMP requires using engine.backward() |
| 2823 | directly as it uses a context manager that cannot be separated from |
| 2824 | the backward call. |
| 2825 | AssertionError: If loss is not a scalar tensor with grad_fn, or if no optimizer |
| 2826 | is configured. |
| 2827 | """ |
| 2828 | assert self.optimizer is not None and not isinstance(self.optimizer, DummyOptim), \ |
| 2829 | "must provide optimizer during init in order to use scale" |
| 2830 | assert maybe_loss_for_backward(loss), \ |
| 2831 | "loss must be a scalar tensor with grad_fn. For non-scalar tensors, use tensor.backward(grad)" |
| 2832 | |
| 2833 | # AMP (NVIDIA Apex) uses a context manager that wraps both scaling and backward, |
| 2834 | # so it cannot be used with manual backward calls |
| 2835 | if self.amp_enabled(): |
| 2836 | raise RuntimeError("engine.scale() is not compatible with AMP (NVIDIA Apex). " |
| 2837 | "When using AMP, you must call engine.backward(loss) instead of manual backward.") |
| 2838 | |
| 2839 | # Apply loss scaler based on optimizer type |
| 2840 | scaled_loss = loss |
| 2841 | if isinstance(self.optimizer, ZeROOptimizer): |
| 2842 | scaled_loss = self.optimizer.scale_if_loss(scaled_loss) |
| 2843 | elif self.torch_autocast_z0_gradscaler: |
| 2844 | scaled_loss = self.torch_autocast_z0_gradscaler.scale(scaled_loss) |
| 2845 | |
| 2846 | # Mark that scale() was called for validation in backward hook |
| 2847 | self._manual_backward_expected = True |
| 2848 | |
| 2849 | return scaled_loss |
| 2850 | |
| 2851 | @instrument_w_nvtx |
| 2852 | def backward(self, loss, retain_graph=False, scale_wrt_gas=True): |