Source code for chainer.gradient_check

import warnings

import numpy
import six

from chainer import cuda
from chainer import function
from chainer.functions.math import identity
from chainer import testing
from chainer import variable


def _copy_arrays(xs):
    xp = cuda.get_array_module(*xs)
    return [xp.copy(x) for x in xs]


[docs]def numerical_grad(f, inputs, grad_outputs, eps=1e-3):
    """Computes numerical gradient by finite differences.

    This function is used to implement gradient check. For usage example, see
    unit tests of :mod:`chainer.functions`.

    Args:
        f (function): Python function with no arguments that runs forward
            computation and returns the result.
        inputs (tuple of arrays): Tuple of arrays that should be treated as
            inputs. Each element of them is slightly modified to realize
            numerical gradient by finite differences.
        grad_outputs (tuple of arrays): Tuple of arrays that are treated as
            output gradients.
        eps (float): Epsilon value of finite differences.

    Returns:
        tuple: Numerical gradient arrays corresponding to ``inputs``.

    """
    assert eps > 0
    inputs = tuple(inputs)
    grad_outputs = tuple(grad_outputs)
    gpu = any(isinstance(x, cuda.ndarray) for x in inputs + grad_outputs)
    cpu = any(isinstance(x, numpy.ndarray) for x in inputs + grad_outputs)

    if gpu and cpu:
        raise RuntimeError('Do not mix GPU and CPU arrays in `numerical_grad`')

    if gpu:
        xp = cuda.cupy
    else:
        xp = numpy
    grads = [xp.zeros_like(x) for x in inputs]

    # Test scripts always run in single thread or multi-process.
    prev_mode = function.Function.type_check_enable  # not thread safe
    try:
        function.Function.type_check_enable = False
        for x, gx in six.moves.zip(inputs, grads):
            for i in numpy.ndindex(x.shape):
                orig = x[i].copy()  # hold original value
                x[i] = orig + eps
                ys1 = _copy_arrays(f())
                x[i] = orig - eps
                ys2 = _copy_arrays(f())
                x[i] = orig
                for y1, y2, gy in six.moves.zip(ys1, ys2, grad_outputs):
                    if gy is not None:
                        dot = ((y1 - y2) * gy).sum()
                        gx[i] += dot / (2 * eps)
    finally:
        function.Function.type_check_enable = prev_mode  # not thread safe
    return grads


def assert_allclose(x, y, atol=1e-5, rtol=1e-4, verbose=True):
    """Asserts if some corresponding element of x and y differs too much.

    This function can handle both CPU and GPU arrays simultaneously.

    Args:
        x: Left-hand-side array.
        y: Right-hand-side array.
        atol (float): Absolute tolerance.
        rtol (float): Relative tolerance.
        verbose (bool): If ``True``, it outputs verbose messages on error.

    """
    warnings.warn(
        'chainer.gradient_check.assert_allclose is deprecated.'
        'Use chainer.testing.assert_allclose instead.',
        DeprecationWarning)
    testing.assert_allclose(x, y, atol, rtol, verbose)


def _as_tuple(x):
    if isinstance(x, tuple):
        return x
    elif isinstance(x, list):
        return tuple(x)
    else:
        return x,


[docs]def check_backward(func, x_data, y_grad, params=(),
                   eps=1e-3, atol=1e-5, rtol=1e-4, no_grads=None, dtype=None):
    """Test backward procedure of a given function.

    This function automatically check backward-process of given function.
    For example, when you have a :class:`~chainer.Function` class ``MyFunc``,
    that gets two arguments and returns one value, you can make its test like
    this::

    >> def test_my_func(self):
    >>   func = MyFunc()
    >>   x1_data = xp.array(...)
    >>   x2_data = xp.array(...)
    >>   gy_data = xp.array(...)
    >>   check_backward(func, (x1_data, x2_data), gy_data)

    This method creates :class:`~chainer.Variable` objects with ``x_data``
    and calls ``func`` with the :class:`~chainer.Variable` s to get its result
    as :class:`~chainer.Variable`.
    Then, it sets ``y_grad`` array to ``grad`` attribute of the result and
    calls ``backward`` method to get gradients of the inputs.
    To check correctness of the gradients, the function calls
    :func:`numerical_grad` to calculate numerically the gradients and compares
    the types of gradients with :func:`chainer.testing.assert_allclose`.
    If input objects (``x1_data`` or/and ``x2_data`` in this example) represent
    integer variables, their gradients are ignored.

    You can simplify a test when ``MyFunc`` gets only one argument::

    >>   check_backward(func, x1_data, gy_data)

    If ``MyFunc`` is a loss function which returns a zero-dimensional
    array, pass ``None`` to ``gy_data``. In this case, it sets ``1`` to
    ``grad`` attribute of the result::

    >>   check_backward(my_loss_func, (x1_data, x2_data), None)

    If ``MyFunc`` returns multiple outputs, pass all gradients for outputs
    as a tuple::

    >>   gy1_data = xp.array(...)
    >>   gy2_data = xp.array(...)
    >>   check_backward(func, x1_data, (gy1_data, gy2_data))

    You can also test a :class:`~chainer.Link`.
    To check gradients of parameters of the link, set a tuple of the parameters
    to ``params`` arguments::

    >>   check_backward(my_link, (x1_data, x2_data), gy_data,
    >>                  (my_link.W, my_link.b))

    Note that ``params`` are not ``ndarray`` s,
    but :class:`~chainer.Variables` s.

    Function objects are acceptable as ``func`` argument::

    >>   check_backward(lambda x1, x2: f(x1, x2),
    >>                  (x1_data, x2_data), gy_data)

    .. note::

       ``func`` is called many times to get numerical gradients for all inputs.
       This function doesn't work correctly when ``func`` behaves randomly as
       it gets different gradients.


    Args:
        func (callable): A function which gets :class:`~chainer.Variable` s
            and returns :class:`~chainer.Variable` s. ``func`` must returns
            a tuple of :class:`~chainer.Variable` s or one
            :class:`~chainer.Variable`. You can use :class:`~chainer.Function`
            object, :class:`~chainer.Link` object or a function satisfying the
            condition.
        x_data (ndarray or tuple of ndarrays): A set of ``ndarray`` s to be
            passed to ``func``. If ``x_data`` is one ``ndarray`` object, it is
            treated as ``(x_data,)``.
        y_grad (ndarray or tuple of ndarrays or None):
            A set of ``ndarray`` s representing gradients of return-values of
            ``func``. If ``y_grad`` is one ``ndarray`` object, it is
            treated as ``(y_grad,)``. If ``func`` is a loss-function,
            ``y_grad`` should be set to ``None``.
        params (~chainer.Variable or tuple of ~chainder.Variable):
            A set of :class:`~chainer.Variable` s whose gradients are checked.
            When ``func`` is a :class:`~chainer.Link` object,
            set its parameters as ``params``.
            If ``params`` is one :class:`~chainer.Variable` object,
            it is treated as ``(params,)``.
        eps (float): Epsilon value to be passed to :func:`numerical_grad`.
        atol (float): Absolute tolerance to be passed to
            :func:`chainer.testing.assert_allclose`.
        rtol (float): Relative tolerance to be passed to
            :func:`chainer.testing.assert_allclose`.
        no_grads (list of bool): Flag to skip variable for gradient assertion.
            It should be same length as ``x_data``.
        dtype (~numpy.dtype): ``x_data`` and ``y_grad`` are casted to this
            dtype when calculating numerical gradients. Only float types and
            ``None`` are allowed.

    See:
       :func:`numerical_grad`
    """
    x_data = _as_tuple(x_data)
    if y_grad is not None:
        y_grad = _as_tuple(y_grad)
    params = _as_tuple(params)

    xs = [variable.Variable(x) for x in x_data]
    y = func(*xs)
    y = _as_tuple(y)

    # All creators of `y` need to be the same because we only call
    # `y[0].backward` to call `backward` method of the creator.
    # To do so we need to insert a dummy function `Ident` to the
    # computational graph.
    # Note that `func` may not be a `Function` object.
    y = identity.Identity()(*y)
    y = _as_tuple(y)

    if y_grad is not None:
        if len(y) != len(y_grad):
            raise ValueError(
                '`y_grad` must have the same length of output values')
        for iy, igy in six.moves.zip(y, y_grad):
            iy.grad = igy
    else:
        if len(y) != 1:
            raise ValueError(
                'When `y_grad` is `None`, the function must return a'
                'zero-dimentional array')
        y_grad = (1,)

    # We only need to call `backward` for one result `Variable`.
    # `Variable.backward` method calls `Function.backward` of its creator.
    y[0].backward()

    if dtype is None:
        casted_xs = [variable.Variable(x) for x in x_data]
    else:
        if numpy.dtype(dtype).kind != 'f':
            raise ValueError('`dtype` is allowed only float type')
        if len(params) > 0:
            raise ValueError('`dtype` is available only if `params` is empty')
        casted_xs = [variable.Variable(x.astype(dtype, copy=False)
                                       if x.dtype.kind == 'f' else x)
                     for x in x_data]

    def f():
        ys = func(*casted_xs)
        ys = _as_tuple(ys)
        return tuple(y.data for y in ys)

    if no_grads is None:
        no_grads = [x.dtype.kind != 'f' for x in xs]
    else:
        if len(no_grads) != len(xs):
            raise ValueError(
                'Length of no_grads param and xs should be same.')
    for skip, x, cx in six.moves.zip(no_grads, xs, casted_xs):
        if skip:
            assert x.grad is None
            continue
        gx, = numerical_grad(f, (cx.data,), y_grad, eps=eps)
        testing.assert_allclose(gx, x.grad, atol=atol, rtol=rtol)
        if dtype is None:
            assert gx.dtype == x.grad.dtype
        else:
            assert gx.dtype.kind == 'f' and gx.dtype == dtype

    for p in params:
        gp, = numerical_grad(f, (p.data,), y_grad, eps=eps)
        testing.assert_allclose(gp, p.grad, atol=atol, rtol=rtol)
        assert gp.dtype is p.grad.dtype