Backprop from layer outputs to inputs Parameters ---------- dLdY : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` The gradient of the loss wrt. the layer output `Y`. retain_grads : bool Whether to inclu
(self, dLdY, retain_grads=True)
| 3288 | return Y |
| 3289 | |
| 3290 | def backward(self, dLdY, retain_grads=True): |
| 3291 | """ |
| 3292 | Backprop from layer outputs to inputs |
| 3293 | |
| 3294 | Parameters |
| 3295 | ---------- |
| 3296 | dLdY : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, in_rows, in_cols, in_ch)` |
| 3297 | The gradient of the loss wrt. the layer output `Y`. |
| 3298 | retain_grads : bool |
| 3299 | Whether to include the intermediate parameter gradients computed |
| 3300 | during the backward pass in the final parameter update. Default is |
| 3301 | True. |
| 3302 | |
| 3303 | Returns |
| 3304 | ------- |
| 3305 | dX : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, in_rows, in_cols, in_ch)` |
| 3306 | The gradient of the loss wrt. the layer input `X`. |
| 3307 | """ # noqa: E501 |
| 3308 | assert self.trainable, "Layer is frozen" |
| 3309 | if not isinstance(dLdY, list): |
| 3310 | dLdY = [dLdY] |
| 3311 | |
| 3312 | Xs = self.X |
| 3313 | out_rows = self.derived_variables["out_rows"] |
| 3314 | out_cols = self.derived_variables["out_cols"] |
| 3315 | |
| 3316 | (fr, fc), s, p = self.kernel_shape, self.stride, self.pad |
| 3317 | |
| 3318 | dXs = [] |
| 3319 | for X, dy, out_row, out_col in zip(Xs, dLdY, out_rows, out_cols): |
| 3320 | n_ex, in_rows, in_cols, nc_in = X.shape |
| 3321 | X_pad, (pr1, pr2, pc1, pc2) = pad2D(X, p, self.kernel_shape, s) |
| 3322 | |
| 3323 | dX = np.zeros_like(X_pad) |
| 3324 | for m in range(n_ex): |
| 3325 | for i in range(out_row): |
| 3326 | for j in range(out_col): |
| 3327 | for c in range(self.out_ch): |
| 3328 | # calculate window boundaries, incorporating stride |
| 3329 | i0, i1 = i * s, (i * s) + fr |
| 3330 | j0, j1 = j * s, (j * s) + fc |
| 3331 | |
| 3332 | if self.mode == "max": |
| 3333 | xi = X[m, i0:i1, j0:j1, c] |
| 3334 | |
| 3335 | # enforce that the mask can only consist of a |
| 3336 | # single `True` entry, even if multiple entries in |
| 3337 | # xi are equal to max(xi) |
| 3338 | mask = np.zeros_like(xi).astype(bool) |
| 3339 | x, y = np.argwhere(xi == np.max(xi))[0] |
| 3340 | mask[x, y] = True |
| 3341 | |
| 3342 | dX[m, i0:i1, j0:j1, c] += mask * dy[m, i, j, c] |
| 3343 | elif self.mode == "average": |
| 3344 | frame = np.ones((fr, fc)) * dy[m, i, j, c] |
| 3345 | dX[m, i0:i1, j0:j1, c] += frame / np.prod((fr, fc)) |
| 3346 | |
| 3347 | pr2 = None if pr2 == 0 else -pr2 |