Propagates the gradient of a tensor backwards through the computation graph. If the 'gradient' argument is not provided, the tensor must be a scalar, and the gradient is implicitly set to 1.0. ```python exec="true" source="above" session="tensor" result="python" t = Tensor([1.0, 2.0
(self, gradient:Tensor|None=None)
| 850 | return ret |
| 851 | |
| 852 | def backward(self, gradient:Tensor|None=None) -> Tensor: |
| 853 | """ |
| 854 | Propagates the gradient of a tensor backwards through the computation graph. |
| 855 | If the 'gradient' argument is not provided, the tensor must be a scalar, and the gradient is implicitly set to 1.0. |
| 856 | ```python exec="true" source="above" session="tensor" result="python" |
| 857 | t = Tensor([1.0, 2.0, 3.0, 4.0]) |
| 858 | t.sum().backward() |
| 859 | print(t.grad.numpy()) |
| 860 | ``` |
| 861 | """ |
| 862 | all_uops = self.uop.toposort() |
| 863 | # backward fills .grad for every in-scope non-CONST float tensor |
| 864 | tensors_need_grad: list[Tensor] = [t for tref in all_tensors if (t:=tref()) is not None and \ |
| 865 | t.uop in all_uops and t.is_floating_point() and t.uop.op is not Ops.CONST] |
| 866 | # clear contexts |
| 867 | for t,g in zip(tensors_need_grad, self.gradient(*tensors_need_grad, gradient=gradient)): |
| 868 | assert g.shape == t.shape, f"grad shape must match tensor shape, {g.shape!r} != {t.shape!r}" |
| 869 | if t.grad is None: t.grad = g |
| 870 | else: t.grad.assign(t.grad + g.to(t.grad.device)) |
| 871 | return self |
| 872 | |
| 873 | # ***** movement ops ***** |
| 874 |