A slower (ie., non-vectorized) but more straightforward implementation of the gradient computations for a 2D conv layer. Parameters ---------- dLdy : :py:class:`ndarray ` of shape `(n_ex, l_out, out_ch)` or list of arrays The gradi
(self, dLdy, retain_grads=True)
| 2831 | return np.squeeze(dX, axis=1), np.squeeze(dW, axis=0), dB |
| 2832 | |
| 2833 | def _backward_naive(self, dLdy, retain_grads=True): |
| 2834 | """ |
| 2835 | A slower (ie., non-vectorized) but more straightforward implementation |
| 2836 | of the gradient computations for a 2D conv layer. |
| 2837 | |
| 2838 | Parameters |
| 2839 | ---------- |
| 2840 | dLdy : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, l_out, out_ch)` or list of arrays |
| 2841 | The gradient(s) of the loss with respect to the layer output(s). |
| 2842 | retain_grads : bool |
| 2843 | Whether to include the intermediate parameter gradients computed |
| 2844 | during the backward pass in the final parameter update. Default is |
| 2845 | True. |
| 2846 | |
| 2847 | Returns |
| 2848 | ------- |
| 2849 | dX : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, l_in, in_ch)` |
| 2850 | The gradient of the loss with respect to the layer input volume. |
| 2851 | """ # noqa: E501 |
| 2852 | assert self.trainable, "Layer is frozen" |
| 2853 | if not isinstance(dLdy, list): |
| 2854 | dLdy = [dLdy] |
| 2855 | |
| 2856 | W = self.parameters["W"] |
| 2857 | b = self.parameters["b"] |
| 2858 | Zs = self.derived_variables["Z"] |
| 2859 | |
| 2860 | Xs, d = self.X, self.dilation |
| 2861 | fw, s, p = self.kernel_width, self.stride, self.pad |
| 2862 | |
| 2863 | dXs = [] |
| 2864 | for X, Z, dy in zip(Xs, Zs, dLdy): |
| 2865 | n_ex, l_out, out_ch = dy.shape |
| 2866 | X_pad, (pr1, pr2) = pad1D(X, p, self.kernel_width, s, d) |
| 2867 | |
| 2868 | dX = np.zeros_like(X_pad) |
| 2869 | dZ = dy * self.act_fn.grad(Z) |
| 2870 | |
| 2871 | dW, dB = np.zeros_like(W), np.zeros_like(b) |
| 2872 | for m in range(n_ex): |
| 2873 | for i in range(l_out): |
| 2874 | for c in range(out_ch): |
| 2875 | # compute window boundaries w. stride and dilation |
| 2876 | i0, i1 = i * s, (i * s) + fw * (d + 1) - d |
| 2877 | |
| 2878 | wc = W[:, :, c] |
| 2879 | kernel = dZ[m, i, c] |
| 2880 | window = X_pad[m, i0 : i1 : (d + 1), :] |
| 2881 | |
| 2882 | dB[:, :, c] += kernel |
| 2883 | dW[:, :, c] += window * kernel |
| 2884 | dX[m, i0 : i1 : (d + 1), :] += wc * kernel |
| 2885 | |
| 2886 | if retain_grads: |
| 2887 | self.gradients["W"] += dW |
| 2888 | self.gradients["b"] += dB |
| 2889 | |
| 2890 | pr2 = None if pr2 == 0 else -pr2 |