| 3780 | |
| 3781 | |
| 3782 | class LSTMCell(LayerBase): |
| 3783 | def __init__( |
| 3784 | self, |
| 3785 | n_out, |
| 3786 | act_fn="Tanh", |
| 3787 | gate_fn="Sigmoid", |
| 3788 | init="glorot_uniform", |
| 3789 | optimizer=None, |
| 3790 | ): |
| 3791 | """ |
| 3792 | A single step of a long short-term memory (LSTM) RNN. |
| 3793 | |
| 3794 | Notes |
| 3795 | ----- |
| 3796 | Notation: |
| 3797 | |
| 3798 | - ``Z[t]`` is the input to each of the gates at timestep `t` |
| 3799 | - ``A[t]`` is the value of the hidden state at timestep `t` |
| 3800 | - ``Cc[t]`` is the value of the *candidate* cell/memory state at timestep `t` |
| 3801 | - ``C[t]`` is the value of the *final* cell/memory state at timestep `t` |
| 3802 | - ``Gf[t]`` is the output of the forget gate at timestep `t` |
| 3803 | - ``Gu[t]`` is the output of the update gate at timestep `t` |
| 3804 | - ``Go[t]`` is the output of the output gate at timestep `t` |
| 3805 | |
| 3806 | Equations:: |
| 3807 | |
| 3808 | Z[t] = stack([A[t-1], X[t]]) |
| 3809 | Gf[t] = gate_fn(Wf @ Z[t] + bf) |
| 3810 | Gu[t] = gate_fn(Wu @ Z[t] + bu) |
| 3811 | Go[t] = gate_fn(Wo @ Z[t] + bo) |
| 3812 | Cc[t] = act_fn(Wc @ Z[t] + bc) |
| 3813 | C[t] = Gf[t] * C[t-1] + Gu[t] * Cc[t] |
| 3814 | A[t] = Go[t] * act_fn(C[t]) |
| 3815 | |
| 3816 | where `@` indicates dot/matrix product, and '*' indicates elementwise |
| 3817 | multiplication. |
| 3818 | |
| 3819 | Parameters |
| 3820 | ---------- |
| 3821 | n_out : int |
| 3822 | The dimension of a single hidden state / output on a given timestep. |
| 3823 | act_fn : str, :doc:`Activation <numpy_ml.neural_nets.activations>` object, or None |
| 3824 | The activation function for computing ``A[t]``. Default is |
| 3825 | `'Tanh'`. |
| 3826 | gate_fn : str, :doc:`Activation <numpy_ml.neural_nets.activations>` object, or None |
| 3827 | The gate function for computing the update, forget, and output |
| 3828 | gates. Default is `'Sigmoid'`. |
| 3829 | init : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} |
| 3830 | The weight initialization strategy. Default is `'glorot_uniform'`. |
| 3831 | optimizer : str, :doc:`Optimizer <numpy_ml.neural_nets.optimizers>` object, or None |
| 3832 | The optimization strategy to use when performing gradient updates |
| 3833 | within the :meth:`update` method. If None, use the :class:`SGD |
| 3834 | <numpy_ml.neural_nets.optimizers.SGD>` optimizer with default |
| 3835 | parameters. Default is None. |
| 3836 | """ # noqa: E501 |
| 3837 | super().__init__(optimizer) |
| 3838 | |
| 3839 | self.init = init |
no outgoing calls