import numpy
from chainer import cuda
from chainer import function
from chainer.utils import conv
from chainer.utils import type_check
def _pair(x):
if hasattr(x, '__getitem__'):
return x
return x, x
def _matmul(a, b, xp):
if xp is numpy:
# numpy 1.9 does not support matmul.
# So we use numpy.einsum instead of numpy.matmul.
return xp.einsum('ijk,ikl->ijl', a, b)
else:
return xp.matmul(a, b)
class DepthwiseConvolution2D(function.Function):
def __init__(self, stride=1, pad=0):
self.sy, self.sx = _pair(stride)
self.ph, self.pw = _pair(pad)
def check_type_forward(self, in_types):
n_in = in_types.size()
type_check.expect(2 <= n_in, n_in <= 3)
x_type = in_types[0]
w_type = in_types[1]
type_check.expect(
x_type.dtype.kind == 'f',
w_type.dtype.kind == 'f',
x_type.ndim == 4,
w_type.ndim == 4,
x_type.shape[1] == w_type.shape[1],
)
if n_in.eval() == 3:
b_type = in_types[2]
type_check.expect(
b_type.dtype == x_type.dtype,
b_type.ndim == 1,
b_type.shape[0] == w_type.shape[0] * w_type.shape[1],
)
def forward(self, inputs):
x, W = inputs[:2]
b = inputs[2] if len(inputs) == 3 else None
kh, kw = W.shape[2:]
xp = cuda.get_array_module(*x)
if xp is numpy:
self.col = conv.im2col_cpu(
x, kh, kw, self.sy, self.sx, self.ph, self.pw)
else:
self.col = conv.im2col_gpu(
x, kh, kw, self.sy, self.sx, self.ph, self.pw)
B, C, KY, KX, IY, IX = self.col.shape
D = W.shape[0] # (D, C, KY, KX)
c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \
.reshape((C, B * IY * IX, KY * KX))
w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D))
# (C, B*IY*IX, KY*KX), (C, KY*KX, D)-> (C, B*IY*IX, D)
y = _matmul(c_, w_, xp).astype(x.dtype, copy=False)
# (C, B*IY*IX, D) -> (B, C*D, IY, IX)
y = y.reshape((C, B, IY * IX, D)).transpose(1, 0, 3, 2) \
.reshape((B, C * D, IY, IX))
if b is not None:
y += b[None, :, None, None]
return y,
def backward(self, inputs, grad_outputs):
x, W = inputs[:2]
b = inputs[2] if len(inputs) == 3 else None
gy = grad_outputs[0]
h, w = x.shape[2:]
xp = cuda.get_array_module(*x)
B, C, KY, KX, IY, IX = self.col.shape
D = W.shape[0]
# (B, C*D, IY, IX) -> (C, D, B*IY*IX, D)
gy_ = gy.reshape((B, C, D, IY * IX)).transpose(1, 2, 0, 3) \
.reshape((C, D, B * IY * IX))
c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \
.reshape((C, B * IY * IX, KY * KX))
# (C, D, B*IY*IX), (C, B*IY*IX, KY*KX) -> (C, D, KY*KX)
gW_ = _matmul(gy_, c_, xp)
gW = gW_.reshape((C, D, KY, KX)).transpose(1, 0, 2, 3)
gW = gW.astype(W.dtype, copy=False)
w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D))
# (C, KY*KX, D), (C, D, B*IY*IX) -> (C, KY*KX, B*IY*IX)
gcol = _matmul(w_, gy_, xp).reshape((C, KY, KX, B, IY, IX))
gcol = gcol.astype(x.dtype, copy=False)
gcol = xp.rollaxis(gcol, 3)
if xp is numpy:
gx = conv.col2im_cpu(gcol, self.sy, self.sx,
self.ph, self.pw, h, w)
else:
gx = conv.col2im_gpu(gcol, self.sy, self.sx,
self.ph, self.pw, h, w)
if b is None:
return gx, gW
else:
gy = xp.rollaxis(gy, 1, 4)
gb = gy.sum(axis=(0, 1, 2))
return gx, gW, gb
[docs]def depthwise_convolution_2d(x, W, b=None, stride=1, pad=0):
"""Two-dimensional depthwise convolution function.
This is an implementation of two-dimensional depthwise convolution.
It takes two or three variables: the input image ``x``, the filter weight
``W``, and optionally, the bias vector ``b``.
Notation: here is a notation for dimensionalities.
- :math:`n` is the batch size.
- :math:`c_I` is the number of the input.
- :math:`c_M` is the channel multiplier.
- :math:`h` and :math:`w` are the height and width of the input image,
respectively.
- :math:`h_O` and :math:`w_O` are the height and width of the output image,
respectively.
- :math:`k_H` and :math:`k_W` are the height and width of the filters,
respectively.
Args:
x (chainer.Variable or :class:`numpy.ndarray` or cupy.ndarray):
Input variable of shape :math:`(n, c_I, h, w)`.
W (~chainer.Variable): Weight variable of shape
:math:`(c_M, c_I, k_H, k_W)`.
b (~chainer.Variable):
Bias variable of length :math:`c_M * c_I` (optional).
stride (int or pair of ints): Stride of filter applications.
``stride=s`` and ``stride=(s, s)`` are equivalent.
pad (int or pair of ints): Spatial padding width for input arrays.
``pad=p`` and ``pad=(p, p)`` are equivalent.
Returns:
~chainer.Variable:
Output variable. Its shape is :math:`(n, c_I * c_M, h_O, w_O)`.
Like ``Convolution2D``, ``DepthwiseConvolution2D`` function computes
correlations between filters and patches of size :math:`(k_H, k_W)` in
``x``.
But unlike ``Convolution2D``, ``DepthwiseConvolution2D`` does not add up
input channels of filters but concatenates them.
For that reason, the shape of outputs of depthwise convolution are
:math:`(n, c_I * c_M, h_O, w_O)`, :math:`c_M` is called channel_multiplier.
:math:`(h_O, w_O)` is determined by the equivalent equation of
``Convolution2D``.
If the bias vector is given, then it is added to all spatial locations of
the output of convolution.
See: `L. Sifre. Rigid-motion scattering for image classification\
<http://www.di.ens.fr/data/publications/papers/phd_sifre.pdf>`_
.. seealso:: :class:`~chainer.links.DepthwiseConvolution2D`
.. admonition:: Example
>>> x = np.random.uniform(0, 1, (2, 3, 4, 7))
>>> W = np.random.uniform(0, 1, (2, 3, 3, 3))
>>> b = np.random.uniform(0, 1, (6,))
>>> y = F.depthwise_convolution_2d(x, W, b)
>>> y.shape
(2, 6, 2, 5)
"""
func = DepthwiseConvolution2D(stride, pad)
if b is None:
return func(x, W)
else:
return func(x, W, b)