MCPcopy Index your code
hub / github.com/deepspeedai/DeepSpeed / scale

Method scale

deepspeed/runtime/engine.py:2799–2849  ·  view source on GitHub ↗

r"""Apply loss scaler for manual backward pass. Use this method when calling loss.backward() directly instead of engine.backward(). This applies the appropriate loss scaler for mixed precision training, allowing you to manually control the backward pass while still benefitin

(self, loss)

Source from the content-addressed store, hash-verified

2797 optimizer.independent_gradient_partition_epilogue()
2798
2799 def scale(self, loss):
2800 r"""Apply loss scaler for manual backward pass.
2801
2802 Use this method when calling loss.backward() directly instead of engine.backward().
2803 This applies the appropriate loss scaler for mixed precision training, allowing you
2804 to manually control the backward pass while still benefiting from DeepSpeed's
2805 gradient scaling functionality.
2806
2807 Example::
2808
2809 output = engine(input)
2810 loss = criterion(output, target)
2811 scaled_loss = engine.scale(loss)
2812 scaled_loss.backward() # Manual backward call
2813 engine.step()
2814
2815 Arguments:
2816 loss: Scalar loss tensor to be scaled
2817
2818 Returns:
2819 Scaled loss tensor ready for .backward() call
2820
2821 Raises:
2822 RuntimeError: If AMP (NVIDIA Apex) is enabled. AMP requires using engine.backward()
2823 directly as it uses a context manager that cannot be separated from
2824 the backward call.
2825 AssertionError: If loss is not a scalar tensor with grad_fn, or if no optimizer
2826 is configured.
2827 """
2828 assert self.optimizer is not None and not isinstance(self.optimizer, DummyOptim), \
2829 "must provide optimizer during init in order to use scale"
2830 assert maybe_loss_for_backward(loss), \
2831 "loss must be a scalar tensor with grad_fn. For non-scalar tensors, use tensor.backward(grad)"
2832
2833 # AMP (NVIDIA Apex) uses a context manager that wraps both scaling and backward,
2834 # so it cannot be used with manual backward calls
2835 if self.amp_enabled():
2836 raise RuntimeError("engine.scale() is not compatible with AMP (NVIDIA Apex). "
2837 "When using AMP, you must call engine.backward(loss) instead of manual backward.")
2838
2839 # Apply loss scaler based on optimizer type
2840 scaled_loss = loss
2841 if isinstance(self.optimizer, ZeROOptimizer):
2842 scaled_loss = self.optimizer.scale_if_loss(scaled_loss)
2843 elif self.torch_autocast_z0_gradscaler:
2844 scaled_loss = self.torch_autocast_z0_gradscaler.scale(scaled_loss)
2845
2846 # Mark that scale() was called for validation in backward hook
2847 self._manual_backward_expected = True
2848
2849 return scaled_loss
2850
2851 @instrument_w_nvtx
2852 def backward(self, loss, retain_graph=False, scale_wrt_gas=True):

Calls 3

amp_enabledMethod · 0.95
maybe_loss_for_backwardFunction · 0.90
scale_if_lossMethod · 0.80