Source code for chainer.links.connection.gru

import numpy

import chainer
from chainer.functions.activation import sigmoid
from chainer.functions.activation import tanh
from chainer.functions.math import linear_interpolate
from chainer import link
from chainer.links.connection import linear


class GRUBase(link.Chain):

    def __init__(self, n_units, n_inputs=None, init=None,
                 inner_init=None, bias_init=0):
        if n_inputs is None:
            n_inputs = n_units
        super(GRUBase, self).__init__(
            W_r=linear.Linear(n_inputs, n_units,
                              initialW=init, initial_bias=bias_init),
            U_r=linear.Linear(n_units, n_units,
                              initialW=inner_init, initial_bias=bias_init),
            W_z=linear.Linear(n_inputs, n_units,
                              initialW=init, initial_bias=bias_init),
            U_z=linear.Linear(n_units, n_units,
                              initialW=inner_init, initial_bias=bias_init),
            W=linear.Linear(n_inputs, n_units,
                            initialW=init, initial_bias=bias_init),
            U=linear.Linear(n_units, n_units,
                            initialW=inner_init, initial_bias=bias_init),
        )


[docs]class GRU(GRUBase): """Stateless Gated Recurrent Unit function (GRU). GRU function has six parameters :math:`W_r`, :math:`W_z`, :math:`W`, :math:`U_r`, :math:`U_z`, and :math:`U`. All these parameters are :math:`n \\times n` matrices, where :math:`n` is the dimension of hidden vectors. Given two inputs a previous hidden vector :math:`h` and an input vector :math:`x`, GRU returns the next hidden vector :math:`h'` defined as .. math:: r &=& \\sigma(W_r x + U_r h), \\\\ z &=& \\sigma(W_z x + U_z h), \\\\ \\bar{h} &=& \\tanh(W x + U (r \\odot h)), \\\\ h' &=& (1 - z) \\odot h + z \\odot \\bar{h}, where :math:`\\sigma` is the sigmoid function, and :math:`\\odot` is the element-wise product. :class:`~chainer.links.GRU` does not hold the value of hidden vector :math:`h`. So this is *stateless*. Use :class:`~chainer.links.StatefulGRU` as a *stateful* GRU. Args: n_units(int): Dimension of hidden vector :math:`h`. n_inputs(int): Dimension of input vector :math:`x`. If ``None``, it is set to the same value as ``n_units``. See: - `On the Properties of Neural Machine Translation: Encoder-Decoder Approaches <http://www.aclweb.org/anthology/W14-4012>`_ [Cho+, SSST2014]. - `Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling <https://arxiv.org/abs/1412.3555>`_ [Chung+NIPS2014 DLWorkshop]. .. seealso:: :class:`~chainer.links.StatefulGRU` """ def __call__(self, h, x): r = sigmoid.sigmoid(self.W_r(x) + self.U_r(h)) z = sigmoid.sigmoid(self.W_z(x) + self.U_z(h)) h_bar = tanh.tanh(self.W(x) + self.U(r * h)) h_new = linear_interpolate.linear_interpolate(z, h_bar, h) return h_new
[docs]class StatefulGRU(GRUBase): """Stateful Gated Recurrent Unit function (GRU). Stateful GRU function has six parameters :math:`W_r`, :math:`W_z`, :math:`W`, :math:`U_r`, :math:`U_z`, and :math:`U`. All these parameters are :math:`n \\times n` matrices, where :math:`n` is the dimension of hidden vectors. Given input vector :math:`x`, Stateful GRU returns the next hidden vector :math:`h'` defined as .. math:: r &=& \\sigma(W_r x + U_r h), \\\\ z &=& \\sigma(W_z x + U_z h), \\\\ \\bar{h} &=& \\tanh(W x + U (r \\odot h)), \\\\ h' &=& (1 - z) \\odot h + z \\odot \\bar{h}, where :math:`h` is current hidden vector. As the name indicates, :class:`~chainer.links.StatefulGRU` is *stateful*, meaning that it also holds the next hidden vector `h'` as a state. Use :class:`~chainer.links.GRU` as a stateless version of GRU. Args: in_size(int): Dimension of input vector :math:`x`. out_size(int): Dimension of hidden vector :math:`h`. init: A callable that takes ``numpy.ndarray`` or ``cupy.ndarray`` and edits its value. It is used for initialization of the GRU's input units (:math:`W`). Maybe be `None` to use default initialization. inner_init: A callable that takes ``numpy.ndarray`` or ``cupy.ndarray`` and edits its value. It is used for initialization of the GRU's inner recurrent units (:math:`U`). Maybe be ``None`` to use default initialization. bias_init: A callable or scalar used to initialize the bias values for both the GRU's inner and input units. Maybe be ``None`` to use default initialization. Attributes: h(~chainer.Variable): Hidden vector that indicates the state of :class:`~chainer.links.StatefulGRU`. .. seealso:: :class:`~chainer.functions.GRU` """ def __init__(self, in_size, out_size, init=None, inner_init=None, bias_init=0): super(StatefulGRU, self).__init__( out_size, in_size, init, inner_init, bias_init) self.state_size = out_size self.reset_state() def to_cpu(self): super(StatefulGRU, self).to_cpu() if self.h is not None: self.h.to_cpu() def to_gpu(self, device=None): super(StatefulGRU, self).to_gpu(device) if self.h is not None: self.h.to_gpu(device) def set_state(self, h): assert isinstance(h, chainer.Variable) h_ = h if self.xp == numpy: h_.to_cpu() else: h_.to_gpu(self._device_id) self.h = h_ def reset_state(self): self.h = None def __call__(self, x): z = self.W_z(x) h_bar = self.W(x) if self.h is not None: r = sigmoid.sigmoid(self.W_r(x) + self.U_r(self.h)) z += self.U_z(self.h) h_bar += self.U(r * self.h) z = sigmoid.sigmoid(z) h_bar = tanh.tanh(h_bar) if self.h is not None: h_new = linear_interpolate.linear_interpolate(z, h_bar, self.h) else: h_new = z * h_bar self.h = h_new return self.h