| 3574 | |
| 3575 | |
| 3576 | class RNNCell(LayerBase): |
| 3577 | def __init__(self, n_out, act_fn="Tanh", init="glorot_uniform", optimizer=None): |
| 3578 | r""" |
| 3579 | A single step of a vanilla (Elman) RNN. |
| 3580 | |
| 3581 | Notes |
| 3582 | ----- |
| 3583 | At timestep `t`, the vanilla RNN cell computes |
| 3584 | |
| 3585 | .. math:: |
| 3586 | |
| 3587 | \mathbf{Z}^{(t)} &= |
| 3588 | \mathbf{W}_{ax} \mathbf{X}^{(t)} + \mathbf{b}_{ax} + |
| 3589 | \mathbf{W}_{aa} \mathbf{A}^{(t-1)} + \mathbf{b}_{aa} \\ |
| 3590 | \mathbf{A}^{(t)} &= f(\mathbf{Z}^{(t)}) |
| 3591 | |
| 3592 | where |
| 3593 | |
| 3594 | - :math:`\mathbf{X}^{(t)}` is the input at time `t` |
| 3595 | - :math:`\mathbf{A}^{(t)}` is the hidden state at timestep `t` |
| 3596 | - `f` is the layer activation function |
| 3597 | - :math:`\mathbf{W}_{ax}` and :math:`\mathbf{b}_{ax}` are the weights |
| 3598 | and bias for the input to hidden layer |
| 3599 | - :math:`\mathbf{W}_{aa}` and :math:`\mathbf{b}_{aa}` are the weights |
| 3600 | and biases for the hidden to hidden layer |
| 3601 | |
| 3602 | Parameters |
| 3603 | ---------- |
| 3604 | n_out : int |
| 3605 | The dimension of a single hidden state / output on a given timestep |
| 3606 | act_fn : str, :doc:`Activation <numpy_ml.neural_nets.activations>` object, or None |
| 3607 | The activation function for computing ``A[t]``. Default is `'Tanh'`. |
| 3608 | init : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} |
| 3609 | The weight initialization strategy. Default is `'glorot_uniform'`. |
| 3610 | optimizer : str, :doc:`Optimizer <numpy_ml.neural_nets.optimizers>` object, or None |
| 3611 | The optimization strategy to use when performing gradient updates |
| 3612 | within the :meth:`update` method. If None, use the :class:`SGD |
| 3613 | <numpy_ml.neural_nets.optimizers.SGD>` optimizer with default |
| 3614 | parameters. Default is None. |
| 3615 | """ # noqa: E501 |
| 3616 | super().__init__(optimizer) |
| 3617 | |
| 3618 | self.init = init |
| 3619 | self.n_in = None |
| 3620 | self.n_out = n_out |
| 3621 | self.n_timesteps = None |
| 3622 | self.act_fn = ActivationInitializer(act_fn)() |
| 3623 | self.parameters = {"Waa": None, "Wax": None, "ba": None, "bx": None} |
| 3624 | self.is_initialized = False |
| 3625 | |
| 3626 | def _init_params(self): |
| 3627 | self.X = [] |
| 3628 | init_weights = WeightInitializer(str(self.act_fn), mode=self.init) |
| 3629 | |
| 3630 | Wax = init_weights((self.n_in, self.n_out)) |
| 3631 | Waa = init_weights((self.n_out, self.n_out)) |
| 3632 | ba = np.zeros((self.n_out, 1)) |
| 3633 | bx = np.zeros((self.n_out, 1)) |
no outgoing calls