Source code for chainer.function

import collections
import contextlib
import os
import threading
import traceback
import weakref

import six

import chainer
from chainer import cuda
from chainer import flag
from chainer.utils import type_check
from chainer import variable


_thread_local = threading.local()


@contextlib.contextmanager
[docs]def no_backprop_mode():
    """Disable back-propagation for Variable whose volatile is auto.

    In the default setting a :class:`~chainer.Variable` object whose
    ``volatile`` attribute is ``'auto'`` behaves like a **non-volatile**
    variable. That means such a :class:`~chainer.Variable` object builds a
    computational graph, consumes memory to store the graph, and you can
    execute back-propagation for it. With this context such a
    :class:`~chainer.Variable` object behaves like a **volatile** variable.
    So, you can easily switch training and evaluation.

    In this example, the volatility of ``x`` and ``y`` is ``'auto'``. So, ``y``
    does not have a computational graph.

    >>> x = chainer.Variable(numpy.array([1,], 'f'), volatile='auto')
    >>> with chainer.no_backprop_mode():
    ...    y = x + 1

    """
    default = getattr(_thread_local, 'default_backprop', True)
    _thread_local.default_backprop = False
    yield
    _thread_local.default_backprop = default


@contextlib.contextmanager
[docs]def force_backprop_mode():
    """Enable back-propagation for Variable whose volatile is auto.

    When you want to enable back-propagation in :func:`no_backprop_mode`,
    call this method. In this context, :class:`~chainer.Variable` object
    whose ``volatile`` attribute is ``'auto'`` behaves like a **volatile**
    variable. That means you can disable :func:`no_backprop_mode` in this
    context.

    If you call this method outside of :func:`no_backprop_mode` context, it
    changes nothing. :class:`~chainer.Variable` object with ``volatile='auto'``
    behaves like a volatile variable by default.

    In this example, the volatility of ``x`` and ``y`` is ``'auto'``. In
    :func:`no_backprop_mode` context, ``y`` does not have a computational graph
    but in :func:`force_backprop_mode` it has a graph.

    >>> with chainer.no_backprop_mode():
    ...   # Variable with volatile='auto' behaves like volatile='on'
    ...   with chainer.force_backprop_mode():
    ...     # Variable with volatile='auto' behaves like volatile='off'
    ...     y = x + 1

    .. seealso::

       See :func:`no_backprop_mode` for details of back-prop mode.

    """
    default = getattr(_thread_local, 'default_backprop', True)
    _thread_local.default_backprop = True
    yield
    _thread_local.default_backprop = default


[docs]class Function(object):

    """Function on variables with backpropagation ability.

    All function implementations defined in :mod:`chainer.functions` inherit
    this class.

    The main feature of this class is keeping track of function applications as
    a backward graph. When a function is applied to :class:`Variable` objects,
    its :meth:`forward` method is called on :data:`~Variable.data` fields of
    input variables, and at the same time it chains references from output
    variables to the function and from the function to its inputs.

    .. note::
       As of v1.5, a function instance cannot be used twice in any
       computational graphs. In order to reuse a function object multiple
       times, use :func:`copy.copy` before the function applications to make a
       copy of the instance.

       This restriction also means that we cannot make a *stateful function*
       anymore. For example, it is now not allowed to let a function hold
       parameters. Define a function as a pure (stateless) procedure, and use
       :class:`~chainer.Link` to combine it with parameter variables.

    .. admonition:: Example


       Let ``x`` an instance of :class:`Variable` and ``f`` an instance of
       :class:`Function` taking only one argument. Then a line

       >>> import numpy, chainer, chainer.functions as F
       >>> x = chainer.Variable(numpy.zeros(10))
       >>> f = F.Identity()
       >>> y = f(x)

       computes a new variable ``y`` and creates backward references. Actually,
       backward references are set as per the following diagram::

           x <--- f <--- y

       If an application of another function ``g`` occurs as

       >>> g = F.Identity()
       >>> z = g(x)

       then the graph grows with a branch::

               |--- f <--- y
           x <-+
               |--- g <--- z

       Note that the branching is correctly managed on backward computation,
       i.e. the gradients from ``f`` and ``g`` are accumulated to the gradient
       of ``x``.

    Every function implementation should provide :meth:`forward_cpu`,
    :meth:`forward_gpu`, :meth:`backward_cpu` and :meth:`backward_gpu`.
    Alternatively, one can provide :meth:`forward` and :meth:`backward` instead
    of separate methods. Backward methods have default implementations that
    just return ``None``, which indicates that the function is non-
    differentiable.

    Attributes:
        inputs: A tuple or list of input variables.
        outputs: A tuple or list of output variables.
        type_check_enable: When it is ``True``, the function checks types of
            input arguments. Set ``CHAINER_TYPE_CHECK`` environment variable
            ``0`` to disable type check, or set the variable directly in
            your own program.

    """
    type_check_enable = int(os.environ.get('CHAINER_TYPE_CHECK', '1')) != 0

[docs]    def __call__(self, *inputs):
        """Applies forward propagation with chaining backward references.

        Basic behavior is expressed in documentation of :class:`Function`
        class.

        .. note::

           If the :data:`~Variable.data` attribute of input variables exist on
           GPU device, then, before it calls :meth:`forward` method, the
           appropriate device is selected, so in most cases implementers do
           not need to take care of device selection.

        Args:
            inputs: Tuple of input :class:`Variable`, :class:`numpy.ndarray` or
                :class:`cupy.ndarray` objects. The volatile flags of all input
                variables must agree. If the input is an :class:`numpy.ndarray`
                or a :class:`cupy.ndarray`, it is automatically wrapped with
                :class:`Variable`.

        Returns:
            One :class:`Variable` object or a tuple of multiple
            :class:`Variable` objects.

        """

        inputs = [x if isinstance(x, chainer.Variable)
                  else chainer.Variable(x, volatile=flag.AUTO)
                  for x in inputs]

        in_data = tuple([x.data for x in inputs])
        if chainer.is_debug():
            self._stack = traceback.extract_stack()

        if self.type_check_enable:
            self._check_data_type_forward(in_data)

        hooks = chainer.get_function_hooks()
        if self._n_local_function_hooks != 0:
            hooks = collections.OrderedDict(hooks)
            hooks.update(self.local_function_hooks)
        for hook in six.itervalues(hooks):
            hook.forward_preprocess(self, in_data)
        # Forward prop
        with cuda.get_device_from_array(*in_data):
            outputs = self.forward(in_data)
            assert type(outputs) == tuple
        for hook in six.itervalues(hooks):
            hook.forward_postprocess(self, in_data)

        if chainer.is_debug():
            if any(out.dtype.kind == 'f' and
                   cuda.get_array_module(out).isnan(out).any()
                   for out in outputs):
                msg = 'NaN is detected on forward computation'
                raise RuntimeError(msg)

        out_v = flag.aggregate_flags([x.volatile for x in inputs])
        ret = tuple([variable.Variable(y, volatile=out_v) for y in outputs])

        if out_v == 'on':
            build_graph = False
        elif out_v == 'off':
            build_graph = True
        else:
            build_graph = getattr(_thread_local, 'default_backprop', True)

        if build_graph:
            # Topological ordering
            self.rank = max([x.rank for x in inputs]) if inputs else 0
            # Backward edges
            for y in ret:
                y.set_creator(self)
            self.inputs = inputs
            # Forward edges (must be weak references)
            self.outputs = tuple([weakref.ref(y) for y in ret])

        if len(ret) == 1:
            return ret[0]
        else:
            return ret

    @property
    def local_function_hooks(self):
        """Ordered Dictionary of registered function hooks.

        Contrary to ``chainer.thread_local.function_hooks``,
        which registers its elements to all functions,
        Function hooks in this property is specific to this function.
        """
        if not hasattr(self, '_local_function_hooks'):
            self._local_function_hooks = collections.OrderedDict()
        return self._local_function_hooks

    @property
    def _n_local_function_hooks(self):
        if hasattr(self, '_local_function_hooks'):
            return len(self._local_function_hooks)
        return 0

    @property
    def label(self):
        """Short text that represents the function.

        The default implementation returns its type name.
        Each function should override it to give more information.
        """
        return self.__class__.__name__

    @property
    def stack(self):
        if hasattr(self, '_stack'):
            return self._stack
        else:
            return None

    def _check_data_type_forward(self, in_data):
        in_type = type_check.get_types(in_data, 'in_types', False)
        with type_check.get_function_check_context(self):
            self.check_type_forward(in_type)

[docs]    def check_type_forward(self, in_types):
        """Checks types of input data before forward propagation.

        Before :meth:`forward` is called, this function is called.
        You need to validate types of input data in this function
        using :ref:`the type checking utilities <type-check-utils>`.

        Args:
            in_types (~chainer.utils.type_check.TypeInfoTuple): The type
                information of input data for :meth:`forward`.
        """
        pass

[docs]    def forward(self, inputs):
        """Applies forward propagation to input arrays.

        It delegates the procedure to :meth:`forward_cpu` or
        :meth:`forward_gpu` by default. Which it selects is determined by the
        type of input arrays.
        Implementations of :class:`Function` must implement either CPU/GPU
        methods or this method.

        Args:
            inputs: Tuple of input array(s).

        Returns:
            Tuple of output array(s).

        .. warning::

            Implementations of :class:`Function` must take care that the
            return value must be a tuple even if it returns only one array.

        """
        if any(isinstance(x, cuda.ndarray) for x in inputs):
            return self.forward_gpu(inputs)
        else:
            return self.forward_cpu(inputs)

[docs]    def forward_cpu(self, inputs):
        """Applies forward propagation to input arrays on CPU.

        Args:
            inputs: Tuple of :class:`numpy.ndarray` object(s).

        Returns:
            tuple: Tuple of :class:`numpy.ndarray` object(s).

        .. warning::

            Implementations of :class:`Function` must take care that the
            return value must be a tuple even if it returns only one array.

        """
        raise NotImplementedError()

[docs]    def forward_gpu(self, inputs):
        """Applies forward propagation to input arrays on GPU.

        Args:
            inputs: Tuple of :class:`cupy.ndarray` object(s).

        Returns:
            tuple: Tuple of :class:`cupy.ndarray` object(s).

        .. warning::

            Implementations of :class:`Function` must take care that the
            return value must be a tuple even if it returns only one array.

        """
        raise NotImplementedError()

[docs]    def backward(self, inputs, grad_outputs):
        """Applies backprop to output gradient arrays.

        It delegates the procedure to :meth:`backward_cpu` or
        :meth:`backward_gpu` by default. Which it selects is determined by the
        type of input arrays and output gradient arrays. Implementations of
        :class:`Function` must implement either CPU/GPU methods or this method,
        if the function is intended to be backprop-ed.

        Args:
            inputs: Tuple of input arrays.
            grad_outputs: Tuple of output gradient arrays.

        Returns:
            tuple: Tuple of input gradient arrays. Some or all of them can be
            ``None``, if the function is not differentiable on
            inputs.

        .. warning::

            Implementations of :class:`Function` must take care that the
            return value must be a tuple even if it returns only one array.

        """
        if any(isinstance(x, cuda.ndarray) for x in inputs + grad_outputs):
            return self.backward_gpu(inputs, grad_outputs)
        else:
            return self.backward_cpu(inputs, grad_outputs)

[docs]    def backward_cpu(self, inputs, grad_outputs):
        """Applies backprop to output gradient arrays on CPU.

        Args:
            inputs: Tuple of input :class:`numpy.ndarray` object(s).
            grad_outputs: Tuple of output gradient :class:`numpy.ndarray`
                object(s).

        Returns:
            tuple: Tuple of input gradient :class:`numpy.ndarray` object(s).
            Some or all of them can be ``None``, if the function is not
            differentiable on corresponding inputs.

        .. warning::

            Implementations of :class:`Function` must take care that the
            return value must be a tuple even if it returns only one array.

        """
        return tuple(None for _ in inputs)

[docs]    def backward_gpu(self, inputs, grad_outputs):
        """Applies backprop to output gradient arrays on GPU.

        Args:
            inputs: Tuple of input :class:`cupy.ndarray`
                object(s).
            grad_outputs: Tuple of output gradient
                :class:`cupy.ndarray` object(s).

        Returns:
            tuple: Tuple of input gradient :class:`cupy.ndarray`
            object(s). Some or all of them can be ``None``, if the function is
            not differentiable on corresponding inputs.

        .. warning::

            Implementations of :class:`Function` must take care that the
            return value must be a tuple even if it returns only one array.

        """
        return tuple(None for _ in inputs)

[docs]    def unchain(self):
        """Purges in/out variables and this function itself from the graph.

        This method is called from :meth:`Variable.unchain_backward` method.

        """
        for y in self.outputs:
            y_ref = y()
            if y_ref is not None:
                y_ref.creator = None
        self.inputs = None

[docs]    def add_hook(self, hook, name=None):
        """Registers the function hook.

        Args:
            hook(~chainer.function.FunctionHook):
                Function hook to be registered.
            name(str): Name of the function hook.
                name must be unique among function hooks
                registered to the function. If ``None``,
                default name of the function hook is used.
        """
        if not isinstance(hook, FunctionHook):
            raise TypeError('Hook must be a FunctionHook')
        if name is None:
            name = hook.name
        if name in self.local_function_hooks:
            raise KeyError('Hook %s already exists' % name)
        self.local_function_hooks[name] = hook

[docs]    def delete_hook(self, name):
        """Unregisters the function hook.

        Args:
            name(str): the name of the function hook
                to be unregistered.
        """
        del self.local_function_hooks[name]


[docs]class FunctionHook(object):
    """Base class of hooks for Functions.

    :class:`~chainer.function.FunctionHook` is an callback object
    that is registered to :class:`~chainer.Function`.
    Registered function hooks are invoked before and after
    forward and backward operations of each function.

    Function hooks that derive :class:`FunctionHook` are required
    to implement four methods:
    :meth:`~chainer.function.FunctionHook.forward_preprocess`,
    :meth:`~chainer.function.FunctionHook.forward_postprocess`,
    :meth:`~chainer.function.FunctionHook.backward_preprocess`, and
    :meth:`~chainer.function.FunctionHook.backward_postprocess`.
    By default, these methods do nothing.

    Specifically, when :meth:`~chainer.Function.__call__`
    method of some function is invoked,
    :meth:`~chainer.function.FunctionHook.forward_preprocess`
    (resp. :meth:`~chainer.function.FunctionHook.forward_postprocess`)
    of all function hooks registered to this function are called before
    (resp. after) forward propagation.

    Likewise, when :meth:`~chainer.Variable.backward` of some
    :class:`~chainer.Variable` is invoked,
    :meth:`~chainer.function.FunctionHook.backward_preprocess`
    (resp. :meth:`~chainer.function.FunctionHook.backward_postprocess`)
    of all function hooks registered to the function which holds this variable
    as a gradient are called before (resp. after) backward propagation.

    There are two ways to register :class:`~chainer.function.FunctionHook`
    objects to :class:`~chainer.Function` objects.

    First one is to use ``with`` statement. Function hooks hooked
    in this way are registered to all functions within ``with`` statement
    and are unregistered at the end of ``with`` statement.

    .. admonition:: Example

        The following code is a simple example in which
        we measure the elapsed time of a part of forward propagation procedure
        with :class:`~chainer.function_hooks.TimerHook`, which is a subclass of
        :class:`~chainer.function.FunctionHook`.

        >>> from chainer import function_hooks
        >>> class Model(chainer.Chain):
        ...     def __call__(self, x1):
        ...         return F.exp(self.l(x1))
        >>> model1 = Model(l=L.Linear(10, 10))
        >>> model2 = Model(l=L.Linear(10, 10))
        >>> x = chainer.Variable(np.zeros((1, 10), 'f'))
        >>> with chainer.function_hooks.TimerHook() as m:
        ...     _ = model1(x)
        ...     y = model2(x)
        ...     print("Total time : " + str(m.total_time()))
        ...     model3 = Model(l=L.Linear(10, 10))
        ...     z = model3(y) # doctest:+ELLIPSIS
        Total time : ...

        In this example, we measure the elapsed times for each forward
        propagation of all functions in ``model1`` and ``model2``
        (specifically, :class:`~chainer.functions.LinearFunction` and
        :class:`~chainer.functions.Exp` of ``model1`` and ``model2``).
        Note that ``model3`` is not a target of measurement
        as :class:`~chainer.function_hooks.TimerHook` is unregistered
        before forward propagation of ``model3``.

    .. note::

       Chainer stores the dictionary of registered function hooks
       as a thread local object. So, function hooks registered
       are different depending on threads.

    The other one is to register directly to
    :class:`~chainer.Function` object with
    :meth:`~chainer.Function.add_hook` method.
    Function hooks registered in this way can be removed by
    :meth:`~chainer.Function.delete_hook` method.
    Contrary to former registration method, function hooks are registered
    only to the function which :meth:`~chainer.Function.add_hook`
    is called.

    Args:
        name(str): Name of this function hook.
    """

    name = 'FunctionHook'

    def __enter__(self):
        function_hooks = chainer.get_function_hooks()
        if self.name in function_hooks:
            raise KeyError('hook %s already exists' % self.name)

        function_hooks[self.name] = self
        return self

    def __exit__(self, *_):
        del chainer.get_function_hooks()[self.name]

    # forward
[docs]    def forward_preprocess(self, function, in_data):
        """Callback function invoked before forward propagation.

        Args:
            function(~chainer.Function): Function object to which
                the function hook is registered.
            in_data(tuple of numpy.ndarray or tuple of cupy.ndarray):
                Input data of forward propagation.
        """
        pass

[docs]    def forward_postprocess(self, function, in_data):
        """Callback function invoked after forward propagation.

        Args:
            function(~chainer.Function): Function object to which
                the function hook is registered.
            in_data(tuple of numpy.ndarray or tuple of cupy.ndarray):
                Input data of forward propagation.
        """
        pass

    # backward
[docs]    def backward_preprocess(self, function, in_data, out_grad):
        """Callback function invoked before backward propagation.

        Args:
            function(~chainer.Function): Function object to which
                the function hook is registered.
            in_data(tuple of numpy.ndarray or tuple of cupy.ndarray):
                Input data of forward propagation.
            out_grad(tuple of numpy.ndarray or tuple of cupy.ndarray):
                Gradient data of backward propagation.
        """
        pass

[docs]    def backward_postprocess(self, function, in_data, out_grad):
        """Callback function invoked after backward propagation.

        Args:
            function(~chainer.Function): Function object to which
                the function hook is registered.
            in_data(tuple of numpy.ndarray or tuple of cupy.ndarray):
                Input of forward propagation.
            out_grad(tuple of numpy.ndarray or tuple of cupy.ndarray):
                Gradient data of backward propagation.
        """
        pass