Run a backward pass across all timesteps in the input. Parameters ---------- dLdA : :py:class:`ndarray ` of shape `(n_ex, n_out, n_t)` The gradient of the loss with respect to the layer output for each of the `n_ex` examples ac
(self, dLdA)
| 1090 | return np.dstack(Y) |
| 1091 | |
| 1092 | def backward(self, dLdA): |
| 1093 | """ |
| 1094 | Run a backward pass across all timesteps in the input. |
| 1095 | |
| 1096 | Parameters |
| 1097 | ---------- |
| 1098 | dLdA : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, n_out, n_t)` |
| 1099 | The gradient of the loss with respect to the layer output for each |
| 1100 | of the `n_ex` examples across all `n_t` timesteps. |
| 1101 | |
| 1102 | Returns |
| 1103 | ------- |
| 1104 | dLdX : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, n_in, n_t)` |
| 1105 | The value of the hidden state for each of the `n_ex` examples |
| 1106 | across each of the `n_t` timesteps. |
| 1107 | """ |
| 1108 | assert self.trainable, "Layer is frozen" |
| 1109 | |
| 1110 | n_ex, n_out, n_t = dLdA.shape |
| 1111 | dLdX_f, dLdX_b, dLdX = [], [], [] |
| 1112 | |
| 1113 | # forward LSTM |
| 1114 | for t in reversed(range(n_t)): |
| 1115 | if self.merge_mode == "concat": |
| 1116 | dLdXt_f = self.cell_fwd.backward(dLdA[:, : self.n_out, t]) |
| 1117 | elif self.merge_mode == "sum": |
| 1118 | dLdXt_f = self.cell_fwd.backward(dLdA[:, :, t]) |
| 1119 | elif self.merge_mode == "multiplty": |
| 1120 | dLdXt_f = self.cell_fwd.backward(dLdA[:, :, t] * self.Y_bwd[t]) |
| 1121 | elif self.merge_mode == "average": |
| 1122 | dLdXt_f = self.cell_fwd.backward(dLdA[:, :, t] * 0.5) |
| 1123 | dLdX_f.insert(0, dLdXt_f) |
| 1124 | |
| 1125 | # backward LSTM |
| 1126 | for t in range(n_t): |
| 1127 | if self.merge_mode == "concat": |
| 1128 | dLdXt_b = self.cell_bwd.backward(dLdA[:, self.n_out :, t]) |
| 1129 | elif self.merge_mode == "sum": |
| 1130 | dLdXt_b = self.cell_bwd.backward(dLdA[:, :, t]) |
| 1131 | elif self.merge_mode == "multiplty": |
| 1132 | dLdXt_b = self.cell_bwd.backward(dLdA[:, :, t] * self.Y_fwd[t]) |
| 1133 | elif self.merge_mode == "average": |
| 1134 | dLdXt_b = self.cell_bwd.backward(dLdA[:, :, t] * 0.5) |
| 1135 | dLdX_b.append(dLdXt_b) |
| 1136 | |
| 1137 | for t in range(n_t): |
| 1138 | dLdX.append(dLdX_f[t] + dLdX_b[t]) |
| 1139 | |
| 1140 | return np.dstack(dLdX) |
| 1141 | |
| 1142 | @property |
| 1143 | def derived_variables(self): |