Source code for chainer.link

import collections
import copy
import warnings

import numpy
import six

from chainer import cuda
from chainer import initializers
import chainer.serializer
from chainer import variable


def _is_shape(value):
    if value is None:
        return True
    elif isinstance(value, collections.Sequence):
        try:
            return all(int(x) for x in value)
        except TypeError:
            return False
    try:
        return int(value)
    except TypeError:
        return False


def _ensure_shape_dtype(value):
    # Return value paired with dtype FP32 if it is a shape.
    if _is_shape(value):
        return value, 'f'
    # Otherwise, returns it with assuming a shape-dtype pair.
    else:
        return value


[docs]class Link(object):

    """Building block of model definitions.

    Link is a building block of neural network models that support various
    features like handling parameters, defining network fragments,
    serialization, etc.

    Link is the primitive structure for the model definitions. It supports
    management of parameter variables and *persistent values* that should be
    incorporated to serialization. Parameters are variables registered via
    the :meth:`add_param` method, or given to the initializer method.
    Persistent values are arrays, scalars, or any other serializable values
    registered via the :meth:`add_persistent` method.

    .. note::
       Whereas arbitrary serializable objects can be registered as persistent
       values, it is strongly recommended to just register values that should
       be treated as results of learning. A typical example of persistent
       values is ones computed during training and required for testing, e.g.
       running statistics for batch normalization.

    Parameters and persistent values are referred by their names. They can be
    accessed as attributes of the links. Link class itself manages the lists
    of names of parameters and persistent values to distinguish parameters and
    persistent values from other attributes.

    Link can be composed into more complex models. This composition feature is
    supported by child classes like :class:`Chain` and :class:`ChainList`. One
    can create a chain by combining one or more links. See the documents for
    these classes for details.

    As noted above, Link supports the serialization protocol of the
    :class:`~chainer.Serializer` class. **Note that only parameters and
    persistent values are saved and loaded.** Other attributes are considered
    as a part of user program (i.e. a part of network definition). In order to
    construct a link from saved file, other attributes must be identically
    reconstructed by user codes.

    .. admonition:: Example

       This is a simple example of custom link definition. Chainer itself also
       provides many links defined under the :mod:`~chainer.links` module. They
       might serve as examples, too.

       Consider we want to define a simple primitive link that implements a
       fully-connected layer based on the :func:`~functions.linear` function.
       Note that this function takes input units, a weight variable, and a bias
       variable as arguments. Then, the fully-connected layer can be defined as
       follows::

          import chainer
          import chainer.functions as F
          import numpy as np

          class LinearLayer(chainer.Link):

              def __init__(self, n_in, n_out):
                  # Parameters are initialized as a numpy array of given shape.
                  super(LinearLayer, self).__init__(
                      W=(n_out, n_in),
                      b=(n_out,),
                  )
                  self.W.data[...] = np.random.randn(n_out, n_in)
                  self.b.data.fill(0)

              def __call__(self, x):
                  return F.linear(x, self.W, self.b)

       This example shows that a user can define arbitrary parameters and use
       them in any methods. Links typically implement the ``__call__``
       operator.

    Args:
        params: Names, shapes, and optional dtypes of initial parameters. The
            keywords are used as the parameter names and the corresponding
            values consist either of the shape or a tuple of shape and a dtype
            `(shape, dtype)`. If only the shape is supplied, the default dtype
            will be used.

    Attributes:
        name (str): Name of this link, given by the parent chain (if exists).

    """

    def __init__(self, **params):
        self._params = []
        self._persistent = []
        self._uninitialized_params = {}
        self._cpu = True
        self._device_id = None
        self.name = None

        for name, value in six.iteritems(params):
            shape, dtype = _ensure_shape_dtype(value)
            self.add_param(name, shape, dtype=dtype)

    @property
    def xp(self):
        """Array module for this link.

        Depending on which of CPU/GPU this link is on, this property returns
        :mod:`numpy` or :mod:`cupy`.

        """
        return numpy if self._cpu else cuda.cupy

[docs]    def add_param(self, name, shape, dtype=numpy.float32, initializer=None):
        """Registers a parameter to the link.

        The registered parameter is saved and loaded on serialization and
        deserialization, and involved in the optimization. The data and
        gradient of the variable are initialized by NaN arrays.
        If ``initializer`` is not ``None``, the data is initialized by
        ``initializer``.

        If the supplied ``name`` argument corresponds to an uninitialized
        parameter (that is, one that was added with the
        :meth:`add_uninitialized_param` method), ``name`` will be removed
        from the set of uninitialized parameters.

        The parameter is set to an attribute of the link with the given name.

        Args:
            name (str): Name of the parameter. This name is also used as the
                attribute name. Any uninitialized parameters with the same
                name will be removed.
            shape (int or tuple of ints): Shape of the parameter array.
            dtype: Data type of the parameter array.
            initializer(chainer.initializer.Initializer): If it is not
                ``None``, the data is initialized with the given initializer.
                Note that in this case ``dtype`` argument is ignored.

        """
        d = self.__dict__
        if name in d:
            raise AttributeError(
                'cannot register a new parameter %s: attribute exists'
                % name)
        if initializer is None:
            data = self.xp.full(shape, numpy.nan, dtype=dtype)
        else:
            data = initializers.generate_array(initializer, shape, self.xp)
        u = self._uninitialized_params.get(name)
        if u is None:
            grad = self.xp.full_like(data, numpy.nan)
        else:
            if u._cleared:
                grad = None
            elif u._zeroed:
                grad = self.xp.zeros_like(data)
            else:
                grad = self.xp.full_like(data, numpy.nan)
        var = variable.Variable(data, volatile='auto', name=name)
        var.grad = grad
        self._params.append(name)
        d[name] = var
        if name in self._uninitialized_params:
            del self._uninitialized_params[name]

[docs]    def add_uninitialized_param(self, name):
        """Registers an uninitialized parameter to the link.

        An uninitialized parameter is defined as a parameter that has a name
        but that does not yet have a shape. If the shape of a parameter
        depends on the shape of the inputs to the ``__call__`` operator,
        it can be useful to defer initialization (that is, setting the shape)
        until the first forward call of the link. Such parameters are
        intended to be defined as uninitialized parameters in the initializer
        and then initialized during the first forward call.

        An uninitialized parameter is intended to be registered to a link by
        calling this method in the initializer method. Then, during the
        first forward call, the shape of the parameter will be determined
        from the size of the inputs and the parameter must be initialized by
        calling the :meth:`add_param` method.

        Args:
            name: (str): Name of the uninitialized parameter.

        """
        class uninitialized_param(object):

            def __init__(self):
                self._cleared = False
                self._zeroed = False

        d = self.__dict__
        if (name in self._uninitialized_params) or (name in d):
            raise AttributeError(
                'cannot register a new uninitialized parameter %s: exists'
                % name)
        self._uninitialized_params[name] = uninitialized_param()

    @property
    def has_uninitialized_params(self):
        """Check if the link has uninitialized parameters.

        Returns:
            bool: ``True`` if the link has any uninitialized parameters.
            Otherwise returns ``False``.

        """
        return len(self._uninitialized_params) > 0

[docs]    def add_persistent(self, name, value):
        """Registers a persistent value to the link.

        The registered value is saved and loaded on serialization and
        deserialization. The value is set to an attribute of the link.

        Args:
            name (str): Name of the persistent value. This name is also used
                for the attribute name.
            value: Value to be registered.

        """
        d = self.__dict__
        if name in d:
            raise AttributeError(
                'cannot register a new persistent value %s: attribute exists'
                % name)
        self._persistent.append(name)
        d[name] = value

[docs]    def copy(self):
        """Copies the link hierarchy to new one.

        The whole hierarchy rooted by this link is copied. The copy is
        basically shallow, except that the parameter variables are also
        shallowly copied. It means that the parameter variables of copied one
        are different from ones of original link, while they share the data and
        gradient arrays.

        The name of the link is reset on the copy, since the copied instance
        does not belong to the original parent chain (even if exists).

        Returns:
            Link: Copied link object.

        """
        ret = copy.copy(self)
        ret._params = list(self._params)
        ret._persistent = list(self._persistent)
        ret.name = None
        d = ret.__dict__
        for name in ret._params:
            d[name] = copy.copy(d[name])
            d[name].grad = None
        return ret

[docs]    def to_cpu(self):
        """Copies parameter variables and persistent values to CPU.

        This method does not handle non-registered attributes. If some of such
        attributes must be copied to CPU, the link implementation must
        override this method to do so.

        Returns: self

        """
        if self._cpu:
            return self
        d = self.__dict__
        for name in self._params:
            d[name].to_cpu()
        for name in self._persistent:
            value = d[name]
            if isinstance(value, cuda.ndarray):
                d[name] = value.get()
        self._cpu = True
        self._device_id = None
        return self

[docs]    def to_gpu(self, device=None):
        """Copies parameter variables and persistent values to GPU.

        This method does not handle non-registered attributes. If some of such
        attributes must be copied to GPU, the link implementation must
        override this method to do so.

        Args:
            device: Target device specifier. If omitted, the current device is
                used.

        Returns: self

        """
        cuda.check_cuda_available()
        if not self._cpu:
            return self
        d = self.__dict__
        with cuda._get_device(device):
            for name in self._params:
                d[name].to_gpu()
            for name in self._persistent:
                value = d[name]
                if isinstance(value, numpy.ndarray):
                    d[name] = cuda.to_gpu(value)
            self._device_id = cuda.cupy.cuda.get_device_id()
        self._cpu = False
        return self

[docs]    def params(self):
        """Returns a generator of all parameters under the link hierarchy.

        Returns:
            A generator object that generates all parameters.

        """
        d = self.__dict__
        for name in self._params:
            yield d[name]

[docs]    def namedparams(self):
        """Returns a generator of all (path, param) pairs under the hierarchy.

        Returns:
            A generator object that generates all (path, parameter) pairs. The
            paths are relative from this link.

        """
        d = self.__dict__
        for name in self._params:
            yield '/' + name, d[name]

[docs]    def links(self, skipself=False):
        """Returns a generator of all links under the hierarchy.

        Args:
            skipself (bool): If ``True``, then the generator skips this link
                and starts with the first child link.

        Returns:
            A generator object that generates all links.

        """
        if not skipself:
            yield self

[docs]    def namedlinks(self, skipself=False):
        """Returns a generator of all (path, link) pairs under the hierarchy.

        Args:
            skipself (bool): If ``True``, then the generator skips this link
                and starts with the first child link.

        Returns:
            A generator object that generates all (path, link) pairs.

        """
        if not skipself:
            yield '/', self

[docs]    def children(self):
        """Returns a generator of all child links.

        Returns:
            A generator object that generates all child links.

        """
        if 0:
            yield

[docs]    def copyparams(self, link):
        """Copies all parameters from given link.

        This method copies data arrays of all parameters in the hierarchy. The
        copy is even done across the host and devices. Note that this method
        does not copy the gradient arrays.

        Args:
            link (Link): Source link object.

        """
        src = link.__dict__
        dst = self.__dict__
        for name in self._params:
            dst[name].copydata(src[name])
        # tuple() here is needed to avoid conflicts with add_param
        for name in tuple(self._uninitialized_params):
            if name in src:
                src_param = src[name]
                self.add_param(name, src_param.shape, src_param.dtype)
                dst[name].copydata(src_param)

[docs]    def cleargrads(self):
        """Clears all gradient arrays.

        This method should be called before the backward computation at every
        iteration of the optimization.

        """
        for param in self.params():
            param.cleargrad()
        for link in self.links():
            for param in link._uninitialized_params.values():
                param._cleared = True

[docs]    def zerograds(self):
        """Initializes all gradient arrays by zero.

        This method can be used for the same purpose of cleargrads, but less
        efficient. This method is left for backward compatibility.

        .. deprecated:: v1.15
           Use :meth:`cleargrads` instead.

        """
        warnings.warn(
            'Link.zerograds is deprecated. Use Link.cleargrads instead.',
            DeprecationWarning)
        for param in self.params():
            param.zerograd()
        for link in self.links():
            for param in link._uninitialized_params.values():
                param._zeroed = True

[docs]    def addgrads(self, link):
        """Accumulates gradient values from given link.

        This method adds each gradient array of the given link to corresponding
        gradient array of this link. The accumulation is even done across
        host and different devices.

        Args:
            link (Link): Source link object.

        """
        src = link.__dict__
        dst = self.__dict__
        for name in self._params:
            dst[name].addgrad(src[name])

[docs]    def serialize(self, serializer):
        """Serializes the link object.

        Args:
            serializer (~chainer.AbstractSerializer): Serializer object.

        """
        d = self.__dict__
        for name in self._params:
            serializer(name, d[name].data)
        for name in self._persistent:
            d[name] = serializer(name, d[name])
        if (self.has_uninitialized_params and
                isinstance(serializer, chainer.serializer.Serializer)):
            raise ValueError("uninitialized parameters cannot be serialized")
        for name in self._uninitialized_params.copy():
            # Note: There should only be uninitialized parameters
            # during deserialization.
            initialized_value = serializer(name, None)
            self.add_param(name, initialized_value.shape)
            uninitialized_value = d[name].data
            if isinstance(uninitialized_value, numpy.ndarray):
                numpy.copyto(uninitialized_value, initialized_value)
            elif isinstance(uninitialized_value, cuda.ndarray):
                uninitialized_value.set(numpy.asarray(initialized_value))


[docs]class Chain(Link):

    """Composable link with object-like interface.

    Composability is one of the most important features of neural nets. Neural
    net models consist of many reusable fragments, and each model itself might
    be embedded into a larger learnable system. Chain enables us to write a
    neural net based on composition, without bothering about routine works like
    collecting parameters, serialization, copying the structure with parameters
    shared, etc.

    This class actually provides a way to compose one or more links into one
    structure. A chain can contain one or more *child links*. Child link is a
    link registered to the chain with its own name. The child link is stored to
    an attribute of the chain with the name. User can write a whole model or a
    fragment of neural nets as a child class of Chain.

    Each chain itself is also a link. Therefore, one can combine chains into
    higher-level chains. In this way, links and chains construct a *link
    hierarchy*. Link hierarchy forms a tree structure, where each node is
    identified by the path from the root. The path is represented by a string
    like a file path in UNIX, consisting of names of nodes on the path, joined
    by slashes ``/``.

    .. admonition:: Example

       This is a simple example of custom chain definition. Chainer itself also
       provides some chains defined under the :mod:`~chainer.links` module.
       They might serve as examples, too.

       Consider we want to define a multi-layer perceptron consisting of two
       hidden layers with rectifiers as activation functions. We can use the
       :class:`~chainer.links.Linear` link as a building block::

          import chainer
          import chainer.functions as F
          import chainer.links as L

          class MultiLayerPerceptron(chainer.Chain):

              def __init__(self, n_in, n_hidden, n_out):
                  # Create and register three layers for this MLP
                  super(MultiLayerPerceptron, self).__init__(
                      layer1=L.Linear(n_in, n_hidden),
                      layer2=L.Linear(n_hidden, n_hidden),
                      layer3=L.Linear(n_hidden, n_out),
                  )

              def __call__(self, x):
                  # Forward propagation
                  h1 = F.relu(self.layer1(x))
                  h2 = F.relu(self.layer2(h1))
                  return self.layer3(h2)

       Child links are registered via the initializer method. They also can be
       registered by the :meth:`add_link` method. The forward propagation is
       often implemented as The ``__call__`` operator as the above example,
       though it is not mandatory.

    Args:
        links: Child links. The keywords are used as their names. The names are
            also set to the links.

    """

    def __init__(self, **links):
        super(Chain, self).__init__()
        self._children = []

        for name, link in six.iteritems(links):
            self.add_link(name, link)

[docs]    def __getitem__(self, name):
        """Equivalent to getattr."""
        return getattr(self, name)

[docs]    def add_link(self, name, link):
        """Registers a child link to this chain.

        The registered link is saved and loaded on serialization and
        deserialization, and involved in the optimization. The registered link
        is called a child. The child link is set to an attribute of the chain
        with the given name.

        This method also sets the :attr:`~Link.name` attribute of the
        registered link. If the given link already has the name attribute set,
        then it raises an error.

        Args:
            name (str): Name of the child link. This name is also used as the
                attribute name.
            link (Link): The link object to be registered.

        """
        if link.name is not None:
            raise ValueError(
                'given link is already registered to another chain by name %s'
                % link.name)
        d = self.__dict__
        if name in d:
            raise AttributeError(
                'cannot register a new link %s: attribute exists' % name)
        self._children.append(name)
        link.name = name
        d[name] = link

    def copy(self):
        ret = super(Chain, self).copy()
        ret._children = list(ret._children)
        d = ret.__dict__
        for name in ret._children:
            # copy child links recursively
            copied = d[name].copy()
            copied.name = name
            d[name] = copied
        return ret

    def to_cpu(self):
        super(Chain, self).to_cpu()
        d = self.__dict__
        for name in self._children:
            d[name].to_cpu()
        return self

    def to_gpu(self, device=None):
        with cuda._get_device(device):
            super(Chain, self).to_gpu()
            d = self.__dict__
            for name in self._children:
                d[name].to_gpu()
        return self

    def params(self):
        for param in super(Chain, self).params():
            yield param
        d = self.__dict__
        for name in self._children:
            for param in d[name].params():
                yield param

    def namedparams(self):
        for ret in super(Chain, self).namedparams():
            yield ret
        d = self.__dict__
        for name in self._children:
            prefix = '/' + name
            for path, param in d[name].namedparams():
                yield prefix + path, param

    def links(self, skipself=False):
        if not skipself:
            yield self
        d = self.__dict__
        for name in self._children:
            for link in d[name].links():
                yield link

    def namedlinks(self, skipself=False):
        if not skipself:
            yield '/', self
        d = self.__dict__
        for name in self._children:
            child = d[name]
            prefix = '/' + name
            yield prefix, child
            for path, link in d[name].namedlinks(True):
                yield prefix + path, link

    def children(self):
        d = self.__dict__
        for name in self._children:
            yield d[name]

    def copyparams(self, link):
        super(Chain, self).copyparams(link)
        src = link.__dict__
        dst = self.__dict__
        for name in self._children:
            dst[name].copyparams(src[name])

    def addgrads(self, link):
        super(Chain, self).addgrads(link)
        src = link.__dict__
        dst = self.__dict__
        for name in self._children:
            dst[name].addgrads(src[name])

    def serialize(self, serializer):
        super(Chain, self).serialize(serializer)
        d = self.__dict__
        for name in self._children:
            d[name].serialize(serializer[name])


[docs]class ChainList(Link):

    """Composable link with list-like interface.

    This is another example of compositional link. Unlike :class:`Chain`, this
    class can be used like a list of child links. Each child link is indexed by
    a non-negative integer, and it maintains the current number of registered
    child links. The :meth:`add_link` method inserts a new link at the end of
    the list. It is useful to write a chain with arbitrary number of child
    links, e.g. an arbitrarily deep multi-layer perceptron.

    Note that this class does not implement all methods of :class:`list`.

    Args:
        links: Initial child links.

    """

    def __init__(self, *links):
        super(ChainList, self).__init__()
        self._children = []

        for link in links:
            self.add_link(link)

[docs]    def __getitem__(self, index):
        """Returns the child at given index.

        Args:
            index (int): Index of the child in the list.

        Returns:
            Link: The ``index``-th child link.

        """
        return self._children[index]

    def __iter__(self):
        return iter(self._children)

[docs]    def __len__(self):
        """Returns a number of children."""
        return len(self._children)

[docs]    def add_link(self, link):
        """Registers a child link to this chain.

        The registered link is saved and loaded on serialization and
        deserialization, and involved in the optimization. The registered link
        is called a child. The child link is accessible via :meth:`children`
        generator, which returns a generator running through the children in
        registered order.

        This method also sets the :attr:`~Link.name` attribute of the
        registered link. If the given link already has the name attribute set,
        then it raises an error.

        Args:
            link (Link): The link object to be registered.

        """
        if link.name is not None:
            raise ValueError(
                'given link is already registered to another chain by name %s'
                % link.name)
        link.name = str(len(self._children))
        self._children.append(link)

    def copy(self):
        ret = super(ChainList, self).copy()
        ret._children = list(ret._children)  # copy
        children = ret._children
        for i, child in enumerate(children):
            child = child.copy()
            child.name = str(i)
            children[i] = child
        return ret

    def to_cpu(self):
        super(ChainList, self).to_cpu()
        for link in self._children:
            link.to_cpu()
        return self

    def to_gpu(self, device=None):
        with cuda._get_device(device):
            super(ChainList, self).to_gpu()
            for link in self._children:
                link.to_gpu()
        return self

    def params(self):
        for param in super(ChainList, self).params():
            yield param
        for link in self._children:
            for param in link.params():
                yield param

    def namedparams(self):
        for ret in super(ChainList, self).namedparams():
            yield ret
        for idx, link in enumerate(self._children):
            prefix = '/%d' % idx
            for path, param in link.namedparams():
                yield prefix + path, param

    def links(self, skipself=False):
        if not skipself:
            yield self
        for child in self._children:
            for link in child.links():
                yield link

    def namedlinks(self, skipself=False):
        if not skipself:
            yield '/', self
        for idx, child in enumerate(self._children):
            prefix = '/%d' % idx
            yield prefix, child
            for path, link in child.namedlinks(True):
                yield prefix + path, link

    def children(self):
        for child in self._children:
            yield child

    def copyparams(self, link):
        super(ChainList, self).copyparams(link)
        for idx, child in enumerate(self._children):
            child.copyparams(link[idx])

    def addgrads(self, link):
        super(ChainList, self).addgrads(link)
        for idx, child in enumerate(self._children):
            child.addgrads(link[idx])

    def serialize(self, serializer):
        super(ChainList, self).serialize(serializer)
        for idx, child in enumerate(self._children):
            child.serialize(serializer['%d' % idx])