Source code for chainer.links.connection.peephole

from chainer import cuda
from chainer.functions.activation import sigmoid
from chainer.functions.activation import tanh
from chainer.functions.array import reshape
from chainer.functions.array import split_axis
from chainer import link
from chainer.links.connection import linear
from chainer import variable


[docs]class StatefulPeepholeLSTM(link.Chain):

    """Fully-connected LSTM layer with peephole connections.

    This is a fully-connected LSTM layer with peephole connections as a chain.
    Unlike the :class:`~chainer.links.LSTM` link, this chain holds ``peep_i``,
    ``peep_f`` and ``peep_o`` as child links besides ``upward`` and
    ``lateral``.

    Given a input vector :math:`x`, Peephole returns the next hidden vector
    :math:`h'` defined as

    .. math::

       a &=& \\tanh(upward x + lateral h), \\\\
       i &=& \\sigma(upward x + lateral h + peep_i c), \\\\
       f &=& \\sigma(upward x + lateral h + peep_f c), \\\\
       c' &=& a \\odot i + f \\odot c, \\\\
       o &=& \\sigma(upward x + lateral h + peep_o c'), \\\\
       h' &=& o \\tanh(c'),

    where :math:`\\sigma` is the sigmoid function, :math:`\\odot` is the
    element-wise product, :math:`c` is the current cell state, :math:`c'`
    is the next cell state and :math:`h` is the current hidden vector.

    Args:
        in_size(int): Dimension of the input vector :math:`x`.
        out_size(int): Dimension of the hidden vector :math:`h`.

    Attributes:
        upward (~chainer.links.Linear): Linear layer of upward connections.
        lateral (~chainer.links.Linear): Linear layer of lateral connections.
        peep_i (~chainer.links.Linear): Linear layer of peephole connections
                                        to the input gate.
        peep_f (~chainer.links.Linear): Linear layer of peephole connections
                                        to the forget gate.
        peep_o (~chainer.links.Linear): Linear layer of peephole connections
                                        to the output gate.
        c (~chainer.Variable): Cell states of LSTM units.
        h (~chainer.Variable): Output at the current time step.

    """

    def __init__(self, in_size, out_size):
        super(StatefulPeepholeLSTM, self).__init__(
            upward=linear.Linear(in_size, 4 * out_size),
            lateral=linear.Linear(out_size, 4 * out_size, nobias=True),
            peep_i=linear.Linear(out_size, out_size, nobias=True),
            peep_f=linear.Linear(out_size, out_size, nobias=True),
            peep_o=linear.Linear(out_size, out_size, nobias=True),
        )
        self.state_size = out_size
        self.reset_state()

    def to_cpu(self):
        super(StatefulPeepholeLSTM, self).to_cpu()
        if self.c is not None:
            self.c.to_cpu()
        if self.h is not None:
            self.h.to_cpu()

    def to_gpu(self, device=None):
        super(StatefulPeepholeLSTM, self).to_gpu(device)
        if self.c is not None:
            self.c.to_gpu(device)
        if self.h is not None:
            self.h.to_gpu(device)

[docs]    def reset_state(self):
        """Resets the internal states.

        It sets ``None`` to the :attr:`c` and :attr:`h` attributes.

        """
        self.c = self.h = None

[docs]    def __call__(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        lstm_in = self.upward(x)
        if self.h is not None:
            lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            with cuda.get_device_from_id(self._device_id):
                self.c = variable.Variable(
                    xp.zeros((x.shape[0], self.state_size), dtype=x.dtype),
                    volatile='auto')
        lstm_in = reshape.reshape(lstm_in, (len(lstm_in.data),
                                            lstm_in.shape[1] // 4,
                                            4))
        a, i, f, o = split_axis.split_axis(lstm_in, 4, 2)
        a = reshape.reshape(a, (len(a.data), a.shape[1]))
        i = reshape.reshape(i, (len(i.data), i.shape[1]))
        f = reshape.reshape(f, (len(f.data), f.shape[1]))
        o = reshape.reshape(o, (len(o.data), o.shape[1]))
        peep_in_i = self.peep_i(self.c)
        peep_in_f = self.peep_f(self.c)
        a = tanh.tanh(a)
        i = sigmoid.sigmoid(i + peep_in_i)
        f = sigmoid.sigmoid(f + peep_in_f)
        self.c = a * i + f * self.c
        peep_in_o = self.peep_o(self.c)
        o = sigmoid.sigmoid(o + peep_in_o)
        self.h = o * tanh.tanh(self.c)
        return self.h