Source code for chainer.functions.connection.depthwise_convolution_2d

import numpy

from chainer import cuda
from chainer import function
from chainer.utils import conv
from chainer.utils import type_check


def _pair(x):
    if hasattr(x, '__getitem__'):
        return x
    return x, x


def _matmul(a, b, xp):
    if xp is numpy:
        # numpy 1.9 does not support matmul.
        # So we use numpy.einsum instead of numpy.matmul.
        return xp.einsum('ijk,ikl->ijl', a, b)
    else:
        return xp.matmul(a, b)


class DepthwiseConvolution2D(function.Function):

    def __init__(self, stride=1, pad=0):
        self.sy, self.sx = _pair(stride)
        self.ph, self.pw = _pair(pad)

    def check_type_forward(self, in_types):
        n_in = in_types.size()
        type_check.expect(2 <= n_in, n_in <= 3)

        x_type = in_types[0]
        w_type = in_types[1]
        type_check.expect(
            x_type.dtype.kind == 'f',
            w_type.dtype.kind == 'f',
            x_type.ndim == 4,
            w_type.ndim == 4,
            x_type.shape[1] == w_type.shape[1],
        )

        if n_in.eval() == 3:
            b_type = in_types[2]
            type_check.expect(
                b_type.dtype == x_type.dtype,
                b_type.ndim == 1,
                b_type.shape[0] == w_type.shape[0] * w_type.shape[1],
            )

    def forward(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        kh, kw = W.shape[2:]

        xp = cuda.get_array_module(*x)
        if xp is numpy:
            self.col = conv.im2col_cpu(
                x, kh, kw, self.sy, self.sx, self.ph, self.pw)
        else:
            self.col = conv.im2col_gpu(
                x, kh, kw, self.sy, self.sx, self.ph, self.pw)

        B, C, KY, KX, IY, IX = self.col.shape
        D = W.shape[0]  # (D, C, KY, KX)
        c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \
            .reshape((C, B * IY * IX, KY * KX))
        w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D))

        # (C, B*IY*IX, KY*KX), (C, KY*KX, D)-> (C, B*IY*IX, D)
        y = _matmul(c_, w_, xp).astype(x.dtype, copy=False)

        # (C, B*IY*IX, D) -> (B, C*D, IY, IX)
        y = y.reshape((C, B, IY * IX, D)).transpose(1, 0, 3, 2) \
            .reshape((B, C * D, IY, IX))

        if b is not None:
            y += b[None, :, None, None]
        return y,

    def backward(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        h, w = x.shape[2:]

        xp = cuda.get_array_module(*x)

        B, C, KY, KX, IY, IX = self.col.shape
        D = W.shape[0]

        # (B, C*D, IY, IX) -> (C, D, B*IY*IX, D)
        gy_ = gy.reshape((B, C, D, IY * IX)).transpose(1, 2, 0, 3) \
            .reshape((C, D, B * IY * IX))
        c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \
            .reshape((C, B * IY * IX, KY * KX))
        # (C, D, B*IY*IX), (C, B*IY*IX, KY*KX) -> (C, D, KY*KX)
        gW_ = _matmul(gy_, c_, xp)
        gW = gW_.reshape((C, D, KY, KX)).transpose(1, 0, 2, 3)
        gW = gW.astype(W.dtype, copy=False)

        w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D))
        # (C, KY*KX, D), (C, D, B*IY*IX) -> (C, KY*KX, B*IY*IX)
        gcol = _matmul(w_, gy_, xp).reshape((C, KY, KX, B, IY, IX))
        gcol = gcol.astype(x.dtype, copy=False)
        gcol = xp.rollaxis(gcol, 3)

        if xp is numpy:
            gx = conv.col2im_cpu(gcol, self.sy, self.sx,
                                 self.ph, self.pw, h, w)
        else:
            gx = conv.col2im_gpu(gcol, self.sy, self.sx,
                                 self.ph, self.pw, h, w)

        if b is None:
            return gx, gW
        else:
            gy = xp.rollaxis(gy, 1, 4)
            gb = gy.sum(axis=(0, 1, 2))
            return gx, gW, gb


[docs]def depthwise_convolution_2d(x, W, b=None, stride=1, pad=0): """Two-dimensional depthwise convolution function. This is an implementation of two-dimensional depthwise convolution. It takes two or three variables: the input image ``x``, the filter weight ``W``, and optionally, the bias vector ``b``. Notation: here is a notation for dimensionalities. - :math:`n` is the batch size. - :math:`c_I` is the number of the input. - :math:`c_M` is the channel multiplier. - :math:`h` and :math:`w` are the height and width of the input image, respectively. - :math:`h_O` and :math:`w_O` are the height and width of the output image, respectively. - :math:`k_H` and :math:`k_W` are the height and width of the filters, respectively. Args: x (chainer.Variable or :class:`numpy.ndarray` or cupy.ndarray): Input variable of shape :math:`(n, c_I, h, w)`. W (~chainer.Variable): Weight variable of shape :math:`(c_M, c_I, k_H, k_W)`. b (~chainer.Variable): Bias variable of length :math:`c_M * c_I` (optional). stride (int or pair of ints): Stride of filter applications. ``stride=s`` and ``stride=(s, s)`` are equivalent. pad (int or pair of ints): Spatial padding width for input arrays. ``pad=p`` and ``pad=(p, p)`` are equivalent. Returns: ~chainer.Variable: Output variable. Its shape is :math:`(n, c_I * c_M, h_O, w_O)`. Like ``Convolution2D``, ``DepthwiseConvolution2D`` function computes correlations between filters and patches of size :math:`(k_H, k_W)` in ``x``. But unlike ``Convolution2D``, ``DepthwiseConvolution2D`` does not add up input channels of filters but concatenates them. For that reason, the shape of outputs of depthwise convolution are :math:`(n, c_I * c_M, h_O, w_O)`, :math:`c_M` is called channel_multiplier. :math:`(h_O, w_O)` is determined by the equivalent equation of ``Convolution2D``. If the bias vector is given, then it is added to all spatial locations of the output of convolution. See: `L. Sifre. Rigid-motion scattering for image classification\ <http://www.di.ens.fr/data/publications/papers/phd_sifre.pdf>`_ .. seealso:: :class:`~chainer.links.DepthwiseConvolution2D` .. admonition:: Example >>> x = np.random.uniform(0, 1, (2, 3, 4, 7)) >>> W = np.random.uniform(0, 1, (2, 3, 3, 3)) >>> b = np.random.uniform(0, 1, (6,)) >>> y = F.depthwise_convolution_2d(x, W, b) >>> y.shape (2, 6, 2, 5) """ func = DepthwiseConvolution2D(stride, pad) if b is None: return func(x, W) else: return func(x, W, b)