A slower (ie., non-vectorized) but more straightforward implementation of the gradient computations for a 2D conv layer. Parameters ---------- dLdY : :py:class:`ndarray ` of shape `(n_ex, out_rows, out_cols, out_ch)` The gradient o
(self, dLdy, retain_grads=True)
| 3116 | return dX, dW, dB |
| 3117 | |
| 3118 | def _backward_naive(self, dLdy, retain_grads=True): |
| 3119 | """ |
| 3120 | A slower (ie., non-vectorized) but more straightforward implementation |
| 3121 | of the gradient computations for a 2D conv layer. |
| 3122 | |
| 3123 | Parameters |
| 3124 | ---------- |
| 3125 | dLdY : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, out_rows, out_cols, out_ch)` |
| 3126 | The gradient of the loss with respect to the layer output. |
| 3127 | |
| 3128 | Returns |
| 3129 | ------- |
| 3130 | dX : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, in_rows, in_cols, in_ch)` |
| 3131 | The gradient of the loss with respect to the layer input volume. |
| 3132 | """ # noqa: E501 |
| 3133 | assert self.trainable, "Layer is frozen" |
| 3134 | if not isinstance(dLdy, list): |
| 3135 | dLdy = [dLdy] |
| 3136 | |
| 3137 | W = self.parameters["W"] |
| 3138 | b = self.parameters["b"] |
| 3139 | Zs = self.derived_variables["Z"] |
| 3140 | |
| 3141 | Xs, d = self.X, self.dilation |
| 3142 | (fr, fc), s, p = self.kernel_shape, self.stride, self.pad |
| 3143 | |
| 3144 | dXs = [] |
| 3145 | for X, Z, dy in zip(Xs, Zs, dLdy): |
| 3146 | n_ex, out_rows, out_cols, out_ch = dy.shape |
| 3147 | X_pad, (pr1, pr2, pc1, pc2) = pad2D(X, p, self.kernel_shape, s, d) |
| 3148 | |
| 3149 | dZ = dLdy * self.act_fn.grad(Z) |
| 3150 | |
| 3151 | dX = np.zeros_like(X_pad) |
| 3152 | dW, dB = np.zeros_like(W), np.zeros_like(b) |
| 3153 | for m in range(n_ex): |
| 3154 | for i in range(out_rows): |
| 3155 | for j in range(out_cols): |
| 3156 | for c in range(out_ch): |
| 3157 | # compute window boundaries w. stride and dilation |
| 3158 | i0, i1 = i * s, (i * s) + fr * (d + 1) - d |
| 3159 | j0, j1 = j * s, (j * s) + fc * (d + 1) - d |
| 3160 | |
| 3161 | wc = W[:, :, :, c] |
| 3162 | kernel = dZ[m, i, j, c] |
| 3163 | window = X_pad[m, i0 : i1 : (d + 1), j0 : j1 : (d + 1), :] |
| 3164 | |
| 3165 | dB[:, :, :, c] += kernel |
| 3166 | dW[:, :, :, c] += window * kernel |
| 3167 | dX[m, i0 : i1 : (d + 1), j0 : j1 : (d + 1), :] += ( |
| 3168 | wc * kernel |
| 3169 | ) |
| 3170 | |
| 3171 | if retain_grads: |
| 3172 | self.gradients["W"] += dW |
| 3173 | self.gradients["b"] += dB |
| 3174 | |
| 3175 | pr2 = None if pr2 == 0 else -pr2 |