Source code for chainer.functions.noise.simplified_dropconnect

import numpy

import chainer
from chainer import cuda
from chainer import function
from chainer.utils import type_check


def _as_mat(x):
    if x.ndim == 2:
        return x
    return x.reshape(len(x), -1)


def _matmul(a, b, xp):
    if xp is numpy:
        # numpy 1.9 does not support matmul.
        # So we use numpy.einsum instead of numpy.matmul.
        return xp.einsum('ijk,ikl->ijl', a, b)
    else:
        return xp.matmul(a, b)


class SimplifiedDropconnect(function.Function):

    """Linear unit regularized by simplified dropconnect."""

    def __init__(self, ratio, mask=None):
        self.ratio = ratio
        self.mask = mask

    def check_type_forward(self, in_types):
        n_in = in_types.size()
        type_check.expect(2 <= n_in, n_in <= 3)
        x_type, w_type = in_types[:2]

        type_check.expect(
            x_type.dtype.kind == 'f',
            w_type.dtype.kind == 'f',
            x_type.ndim >= 2,
            w_type.ndim == 2,
            type_check.prod(x_type.shape[1:]) == w_type.shape[1],
        )
        if n_in.eval() == 3:
            b_type = in_types[2]
            type_check.expect(
                b_type.dtype == x_type.dtype,
                b_type.ndim == 1,
                b_type.shape[0] == w_type.shape[0],
            )

    def forward(self, inputs):
        scale = inputs[1].dtype.type(1. / (1 - self.ratio))
        xp = cuda.get_array_module(*inputs)
        mask_shape = (inputs[0].shape[0], inputs[1].shape[0],
                      inputs[1].shape[1])
        if self.mask is None:
            if xp == numpy:
                self.mask = xp.random.rand(*mask_shape) >= self.ratio
            else:
                self.mask = xp.random.rand(*mask_shape,
                                           dtype=numpy.float32) >= self.ratio
        elif isinstance(self.mask, chainer.Variable):
            self.mask = self.mask.data

        x = _as_mat(inputs[0])
        W = inputs[1] * scale * self.mask

        # ijk,ik->ij
        y = _matmul(W, x[:, :, None], xp)
        y = y.reshape(y.shape[0], y.shape[1]).astype(x.dtype, copy=False)

        if len(inputs) == 3:
            b = inputs[2]
            y += b
        return y,

    def backward(self, inputs, grad_outputs):
        scale = inputs[1].dtype.type(1. / (1 - self.ratio))
        x = _as_mat(inputs[0])
        W = inputs[1] * scale * self.mask
        gy = grad_outputs[0]
        xp = cuda.get_array_module(*inputs)

        # ij,ijk->ik
        gx = _matmul(gy[:, None, :], W, xp).reshape(inputs[0].shape)
        gx = gx.astype(x.dtype, copy=False)

        # ij,ik,ijk->jk
        gW = (gy[:, :, None] * x[:, None, :] * self.mask).sum(0) * scale
        gW = gW.astype(W.dtype, copy=False)

        if len(inputs) == 3:
            gb = gy.sum(0)
            return gx, gW, gb
        else:
            return gx, gW


[docs]def simplified_dropconnect(x, W, b=None, ratio=.5, train=True, mask=None): """Linear unit regularized by simplified dropconnect. Simplified dropconnect drops weight matrix elements randomly with probability ``ratio`` and scales the remaining elements by factor ``1 / (1 - ratio)``. Which element is dropped depends on each sample. It accepts two or three arguments: an input minibatch ``x``, a weight matrix ``W``, and optionally a bias vector ``b``. It computes :math:`Y = xW^\\top + b`. In testing mode, zero will be used as simplified dropconnect ratio instead of ``ratio``. Notice: This implementation cannot be used for reproduction of the paper. There is a difference between the current implementation and the original one. The original version uses sampling with gaussian distribution before passing activation function, whereas the current implementation averages before activation. Args: x (chainer.Variable or :class:`numpy.ndarray` or cupy.ndarray): Input variable. Its first dimension ``n`` is assumed to be the *minibatch dimension*. The other dimensions are treated as concatenated one dimension whose size must be ``N``. W (~chainer.Variable): Weight variable of shape ``(M, N)``. b (~chainer.Variable): Bias variable (optional) of shape ``(M,)``. ratio (float): Dropconnect ratio. train (bool): If ``True``, executes simplified dropconnect. Otherwise, simplified dropconnect function works as a linear function. mask (None or chainer.Variable or :class:`numpy.ndarray` or cupy.ndarray): If ``None``, randomized dropconnect mask is generated. Otherwise, The mask must be ``(n, M, N)`` shaped array. Main purpose of this option is debugging. `mask` array will be used as a dropconnect mask. Returns: ~chainer.Variable: Output variable. .. seealso:: :class:`~chainer.links.Dropconnect` .. seealso:: Li, W., Matthew Z., Sixin Z., Yann L., Rob F. (2013). Regularization of Neural Network using DropConnect. International Conference on Machine Learning. `URL <http://cs.nyu.edu/~wanli/dropc/>`_ """ if not train: ratio = 0 if b is None: return SimplifiedDropconnect(ratio, mask)(x, W) else: return SimplifiedDropconnect(ratio, mask)(x, W, b)