Source code for chainer.functions.array.im2col

import numpy

from chainer import cuda
from chainer import function

from chainer.utils.conv import col2im_cpu
from chainer.utils.conv import col2im_gpu
from chainer.utils.conv import im2col_cpu
from chainer.utils.conv import im2col_gpu
from chainer.utils import type_check


def _pair(x):
    if hasattr(x, '__getitem__'):
        return x
    return x, x


class Im2Col(function.Function):

    def __init__(self, ksize, stride, pad, cover_all, dilate):
        self.kh, self.kw = _pair(ksize)
        self.sy, self.sx = _pair(stride)
        self.ph, self.pw = _pair(pad)
        self.dy, self.dx = _pair(dilate)

        self.cover_all = cover_all

    def check_type_forward(self, in_types):
        n_in = in_types.size()
        type_check.expect(n_in == 1)

        x_type = in_types[0]
        type_check.expect(
            x_type.dtype.kind == 'f',
            x_type.ndim == 4
        )

    def forward(self, inputs):
        x, = inputs
        xp = cuda.get_array_module(x)
        if xp == numpy:
            y = im2col_cpu(
                x, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
                cover_all=self.cover_all, dy=self.dy, dx=self.dx)
        else:
            y = im2col_gpu(
                x, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
                cover_all=self.cover_all, dy=self.dy, dx=self.dx)
        n, c, kh, kw, out_h, out_w = y.shape
        y = y.reshape(n, c * kh * kw, out_h, out_w)
        return y,

    def backward(self, inputs, grad_outputs):
        x, = inputs
        xp = cuda.get_array_module(x)
        gy, = grad_outputs

        n, _, out_h, out_w = gy.shape
        _, c, h, w = x.shape
        gy = gy.reshape(n, c, self.kh, self.kw, out_h, out_w)
        if xp == numpy:
            gx = col2im_cpu(
                gy, self.sy, self.sx, self.ph, self.pw, h, w, self.dy, self.dx)
        else:
            gx = col2im_gpu(
                gy, self.sy, self.sx, self.ph, self.pw, h, w, self.dy, self.dx)
        return gx,


[docs]def im2col(x, ksize, stride=1, pad=0, cover_all=False, dilate=1):
    """Extract patches from an image based on the filter.

    This function rearranges patches of an image and put them in the channel
    dimension of the output.

    Patches are extracted at positions shifted by multiples of ``stride`` from
    the first position ``-pad`` for each spatial axis.
    The right-most (or bottom-most) patches do not run over the padded spatial
    size.

    Notation: here is a notation.

    - :math:`n` is the batch size.
    - :math:`c` is the number of the input channels.
    - :math:`h` and :math:`w` are the height and width of the input image,
      respectively.
    - :math:`k_H` and :math:`k_W` are the height and width of the filters,
      respectively.
    - :math:`s_Y` and :math:`s_X` are the strides of the filter.
    - :math:`p_H` and :math:`p_W` are the spatial padding sizes.
    - :math:`d_Y` and :math:`d_X` are the dilation factors of filter \
        application.

    The output size :math:`(h_O, w_O)` is determined by the following
    equations when ``cover_all = False``:

    .. math::

       h_O &= (h + 2p_H - k_H - (k_H - 1) * (d_Y - 1)) / s_Y + 1,\\\\
       w_O &= (w + 2p_W - k_W - (k_W - 1) * (d_X - 1)) / s_X + 1.

    When ``cover_all = True``, the output size is determined by
    the following equations:

    .. math::

       h_O &= (h + 2p_H - k_H - (k_H - 1) * (d_Y - 1) + s_Y - 1) / s_Y + 1,\\\\
       w_O &= (w + 2p_W - k_W - (k_W - 1) * (d_X - 1) + s_X - 1) / s_X + 1.


    Args:
        x (~chainer.Variable): Input variable of shape :math:`(n, c, h, w)`.
        ksize (int or pair of ints): Size of filters (a.k.a. kernels).
            ``ksize=k`` and ``ksize=(k, k)`` are equivalent.
        stride (int or pair of ints): Stride of filter applications.
            ``stride=s`` and ``stride=(s, s)`` are equivalent.
        pad (int or pair of ints): Spatial padding width for input arrays.
            ``pad=p`` and ``pad=(p, p)`` are equivalent.
        cover_all (bool): If ``True``, all spatial locations are rearranged
            into some output pixels. It may make the output size larger.
        dilate (int or pair of ints): Dilation factor of filter applications.
            ``dilate=d`` and ``dilate=(d, d)`` are equivalent.

    Returns:
        ~chainer.Variable:
        Output variable whose shape is
        :math:`(n, c \\cdot k_H \\cdot k_W, h_O, w_O)`

    """
    return Im2Col(ksize, stride, pad, cover_all, dilate)(x)