Source code for chainer.links.caffe.caffe_function

import collections
import pkg_resources
import sys
import warnings

import numpy
import six

from chainer import functions
from chainer import link
from chainer import links


def _protobuf3():
    ws = pkg_resources.WorkingSet()
    try:
        ws.require('protobuf>=3.0.0a')
        return True
    except pkg_resources.VersionConflict:
        return False


if _protobuf3():
    from chainer.links.caffe.protobuf3 import caffe_pb2 as caffe_pb
    available = True

    try:
        # This method is undocumented, but is required to read large size of
        # model files when a user uses cpp-implementation.
        from google.protobuf.pyext import _message
        _message.SetAllowOversizeProtos(True)
    except ImportError:
        pass

elif sys.version_info < (3, 0, 0):
    # caffe_pb2 does not support Py3
    from chainer.links.caffe.protobuf2 import caffe_pb2 as caffe_pb
    available = True
else:
    available = False

if available:
    _type_to_method = {}
    _oldname_to_method = {}

    def _layer(typ, oldname):
        def decorator(meth):
            global _type_to_method
            _type_to_method[typ] = meth
            if oldname is not None:
                typevalue = getattr(caffe_pb.V1LayerParameter, oldname)
                _oldname_to_method[typevalue] = meth
            return meth
        return decorator
else:
    def _layer(typ, oldname):  # fallback
        def decorator(meth):
            return meth
        return decorator


[docs]class CaffeFunction(link.Chain):

    """Caffe emulator based on the model file of Caffe.

    Given a protocol buffers file of a Caffe model, this class loads and
    emulates it on :class:`~chainer.Variable` objects. It supports the official
    reference models provided by BVLC.

    .. note::

       protobuf>=3.0.0 is required if you use Python 3 because protobuf 2 is
       not supported on Python 3.

    .. note::

       CaffeFunction ignores the following layers:

       - Layers that CaffeFunction does not support (including data layers)
       - Layers that have no top blobs
       - Layers whose bottom blobs are incomplete (i.e., some or all of them
         are not given nor computed)

    .. warning::

       It does not support full compatibility against Caffe. Some layers and
       configurations are not implemented in Chainer yet, though the reference
       models provided by the BVLC team are supported except data layers.

    .. admonition:: Example

       Consider we want to extract the (unnormalized) log class probability
       of given images using BVLC reference CaffeNet. The model can be
       downloaded from:

       http://dl.caffe.berkeleyvision.org/bvlc_reference_caffenet.caffemodel

       We want to compute the ``fc8`` blob from the ``data`` blob. It is simply
       written as follows::

          # Load the model
          func = CaffeFunction('path/to/bvlc_reference_caffenet.caffemodel')

          # Minibatch of size 10
          x_data = numpy.ndarray((10, 3, 227, 227), dtype=numpy.float32)
          ...  # (Fill the minibatch here)

          # Forward the pre-trained net
          x = Variable(x_data)
          y, = func(inputs={'data': x}, outputs=['fc8'])

       The result ``y`` contains the Variable corresponding to the ``fc8``
       blob. The computational graph is memorized as a usual forward
       computation in Chainer, so we can run backprop through this pre-trained
       net.

    Args:
        model_path (str): Path to the binary-proto model file of Caffe.

    Attributes:
        forwards (dict): A mapping from layer names to corresponding functions.

    """

    def __init__(self, model_path):
        if not available:
            msg = 'CaffeFunction is only supported on protobuf>=3 in Python3'
            raise RuntimeError(msg)

        super(CaffeFunction, self).__init__()

        net = caffe_pb.NetParameter()
        with open(model_path, 'rb') as model_file:
            net.MergeFromString(model_file.read())

        self.forwards = {}
        self.split_map = {}
        self.layers = []

        if net.layer:
            for layer in net.layer:
                meth = _type_to_method.get(layer.type)
                if meth:
                    meth(self, layer)
                else:
                    warnings.warn(
                        'Skip the layer "%s", since CaffeFunction does not'
                        'support %s layer' % (layer.name, layer.type))
        else:  # v1 format
            for layer in net.layers:
                meth = _oldname_to_method.get(layer.type)
                if meth:
                    meth(self, layer)
                else:
                    warnings.warn(
                        'Skip the layer "%s", since CaffeFunction does not'
                        'support it' % layer.name)

[docs]    def __call__(self, inputs, outputs, disable=(), train=True):
        """Executes a sub-network of the network.

        This function acts as an interpreter of the network definition for
        Caffe. On execution, it interprets each layer one by one, and if the
        bottom blobs are already computed, then emulates the layer and stores
        output blobs as :class:`~chainer.Variable` objects.

        Args:
            inputs (dict): A dictionary whose key-value pairs indicate initial
                correspondences between blob names and
                :class:`~chainer.Variable` objects.
            outputs (Iterable): A list of blob names whose corresponding
                :class:`~chainer.Variable` objects are returned.
            disable (Iterable): A list of layer names that will be ignored
                during the forward computation.
            train (bool): If ``True``, this function emulates the TRAIN phase
                of the Caffe layers. Otherwise, it emulates the TEST phase.

        Returns:
            tuple: A tuple of output :class:`~chainer.Variable` objects
                corresponding to elements of the  `outputs` argument.

        """
        self.train = train
        variables = dict(inputs)
        for func_name, bottom, top in self.layers:
            if (func_name in disable or
                func_name not in self.forwards or
                    any(blob not in variables for blob in bottom)):
                continue

            func = self.forwards[func_name]
            input_vars = tuple(variables[blob] for blob in bottom)
            output_vars = func(*input_vars)
            if not isinstance(output_vars, collections.Iterable):
                output_vars = output_vars,
            for var, name in zip(output_vars, top):
                variables[name] = var

        self.variables = variables
        return tuple(variables[blob] for blob in outputs)

    def _add_layer(self, layer):
        bottom = []
        for blob_name in layer.bottom:
            bottom.append(self.split_map.get(blob_name, blob_name))
        self.layers.append((layer.name, bottom, list(layer.top)))

    @_layer('Concat', 'CONCAT')
    def _setup_concat(self, layer):
        param = layer.concat_param
        axis = param.axis
        if axis == 1 and param.concat_dim != 1:
            axis = param.concat_dim

        self.forwards[layer.name] = _ListArgumentFcuntion(
            functions.concat, axis=axis)
        self._add_layer(layer)

    @_layer('Convolution', 'CONVOLUTION')
    def _setup_convolution(self, layer):
        blobs = layer.blobs
        param = layer.convolution_param
        ksize = _get_ksize(param)
        stride = _get_stride(param)
        pad = _get_pad(param)
        num = _get_num(blobs[0])
        channels = _get_channels(blobs[0])

        n_in = channels * param.group
        n_out = num
        func = links.Convolution2D(n_in, n_out, ksize, stride, pad,
                                   nobias=not param.bias_term)
        func.W.data[...] = 0

        part_size = len(blobs[0].data) // param.group
        for i in six.moves.range(param.group):
            in_slice = slice(i * n_in // param.group,
                             (i + 1) * n_in // param.group)
            out_slice = slice(i * n_out // param.group,
                              (i + 1) * n_out // param.group)
            w = func.W.data[out_slice, in_slice]

            data = numpy.array(
                blobs[0].data[i * part_size:(i + 1) * part_size])
            w[:] = data.reshape(w.shape)

        if param.bias_term:
            func.b.data[:] = blobs[1].data

        self.add_link(layer.name, func)
        self.forwards[layer.name] = _CallChildLink(self, layer.name)
        self._add_layer(layer)

    @_layer('Data', 'DATA')
    def _setup_data(self, layer):
        # We silently skip the data layer.
        pass

    @_layer('Dropout', 'DROPOUT')
    def _setup_dropout(self, layer):
        param = layer.dropout_param

        self.forwards[layer.name] = _DropoutFunction(
            self, ratio=param.dropout_ratio)
        self._add_layer(layer)

    @_layer('InnerProduct', 'INNER_PRODUCT')
    def _setup_inner_product(self, layer):
        param = layer.inner_product_param
        bias_term = param.bias_term
        if param.axis != 1:
            raise RuntimeError(
                'Non-default axis in InnerProduct is not supported')

        blobs = layer.blobs
        width, height = _get_width(blobs[0]), _get_height(blobs[0])
        func = links.Linear(width, height, nobias=not bias_term)
        func.W.data.ravel()[:] = blobs[0].data
        if bias_term:
            func.b.data[:] = blobs[1].data

        self.add_link(layer.name, func)
        self.forwards[layer.name] = _CallChildLink(self, layer.name)
        self._add_layer(layer)

    @_layer('LRN', 'LRN')
    def _setup_lrn(self, layer):
        param = layer.lrn_param
        if param.norm_region != param.ACROSS_CHANNELS:
            raise RuntimeError('Within-channel LRN is not supported')

        fwd = _SingleArgumentFunction(
            functions.local_response_normalization,
            n=param.local_size, k=param.k,
            alpha=param.alpha / param.local_size, beta=param.beta)
        self.forwards[layer.name] = fwd
        self._add_layer(layer)

    @_layer('Pooling', 'POOLING')
    def _setup_pooling(self, layer):
        param = layer.pooling_param
        ksize = _get_ksize(param)
        stride = _get_stride(param)
        pad = _get_pad(param)

        if param.pool == param.MAX:
            func = functions.max_pooling_2d
        elif param.pool == param.AVE:
            func = functions.average_pooling_2d
        else:
            raise RuntimeError('Stochastic pooling is not supported')

        fw = _SingleArgumentFunction(func, ksize, stride=stride, pad=pad)
        self.forwards[layer.name] = fw
        self._add_layer(layer)

    @_layer('ReLU', 'RELU')
    def _setup_relu(self, layer):
        slope = layer.relu_param.negative_slope

        if slope != 0:
            fw = _SingleArgumentFunction(functions.leaky_relu, slope=slope)
        else:
            fw = functions.relu

        self.forwards[layer.name] = fw
        self._add_layer(layer)

    @_layer('BatchNorm', None)
    def _setup_batchnorm(self, layer):
        # Get layer parameters.
        blobs = layer.blobs
        param = layer.batch_norm_param
        use_global_stats = param.use_global_stats
        decay = param.moving_average_fraction
        eps = param.eps
        size = int(blobs[0].shape.dim[0])  # Get channel dim from mean blob.

        # Make BatchNormalization link.
        func = links.BatchNormalization(size, decay=decay, eps=eps,
                                        use_gamma=False, use_beta=False)
        func.avg_mean.ravel()[:] = blobs[0].data
        func.avg_var.ravel()[:] = blobs[1].data
        self.add_link(layer.name, func)

        # Add layer.
        fwd = _SingleArgumentFunction(
            _CallChildLink(self, layer.name),
            test=use_global_stats, finetune=False)
        self.forwards[layer.name] = fwd
        self._add_layer(layer)

    @_layer('Eltwise', 'ELTWISE')
    def _setup_eltwise(self, layer):
        # stable_prod_grad parameter is not supported now.
        operation = layer.eltwise_param.operation
        coeffs = layer.eltwise_param.coeff or None
        self.forwards[layer.name] = _EltwiseFunction(operation, coeffs)
        self._add_layer(layer)

    @_layer('Scale', None)
    def _setup_scale(self, layer):
        # Following parameters are not supported now:
        # - negative axis
        # - num_axes
        # - filler
        # - bias_filler

        # Get layer parameters.
        bottom = layer.bottom
        blobs = layer.blobs
        axis = layer.scale_param.axis
        bias_term = layer.scale_param.bias_term

        # Case of only one bottom where W is learnt parameter.
        if len(bottom) == 1:
            W_shape = blobs[0].shape.dim
            func = links.scale.Scale(axis, W_shape, bias_term)
            func.W.data.ravel()[:] = blobs[0].data
            if bias_term:
                func.bias.b.data.ravel()[:] = blobs[1].data
        # Case of two bottoms where W is given as a bottom.
        else:
            shape = blobs[0].shape.dim if bias_term else None
            func = links.scale.Scale(
                axis, bias_term=bias_term, bias_shape=shape)
            if bias_term:
                func.bias.b.data.ravel()[:] = blobs[0].data

        # Add layer.
        self.add_link(layer.name, func)
        self.forwards[layer.name] = _CallChildLink(self, layer.name)
        self._add_layer(layer)

    @_layer('Slice', 'SLICE')
    def _setup_slice(self, layer):
        if layer.slice_param.HasField('axis'):
            axis = layer.slice_param.axis
        elif layer.slice_param.HasField('slice_dim'):
            axis = layer.slice_param.slice_dim
        else:
            axis = 1

        if layer.slice_param.slice_point:
            indices_or_sections = list(layer.slice_param.slice_point)
        else:
            indices_or_sections = len(list(layer.top))

        self.forwards[layer.name] = _SingleArgumentFunction(
            functions.split_axis,
            indices_or_sections=indices_or_sections,
            axis=axis
        )

        self._add_layer(layer)

    @_layer('Softmax', 'SOFTMAX')
    def _setup_softmax(self, layer):
        if layer.softmax_param.axis != 1:
            raise RuntimeError(
                'Softmax along non-channel axis is not supported')

        if layer.softmax_param.engine == 0:  # DEFAULT
            fw = functions.softmax
        elif layer.softmax_param.engine == 1:  # CAFFE
            fw = _SingleArgumentFunction(functions.softmax, use_cudnn=False)
        elif layer.softmax_param.engine == 2:  # CUDNN
            fw = _SingleArgumentFunction(functions.softmax, use_cudnn=True)

        self.forwards[layer.name] = fw
        self._add_layer(layer)

    @_layer('SoftmaxWithLoss', 'SOFTMAX_LOSS')
    def _setup_softmax_with_loss(self, layer):
        if layer.softmax_param.axis != 1:
            raise RuntimeError(
                'Softmax along non-channel axis is not supported')

        self.forwards[layer.name] = functions.softmax_cross_entropy
        self._add_layer(layer)

    @_layer('Split', 'SPLIT')
    def _setup_split(self, layer):
        for top in layer.top:
            self.split_map[top] = layer.bottom[0]


# Internal functions

def _get_ksize(param):
    if param.kernel_h > 0:
        return param.kernel_h, param.kernel_w
    elif type(param.kernel_size) == int:
        return param.kernel_size
    elif len(param.kernel_size) == 1:
        return param.kernel_size[0]
    else:
        return param.kernel_size


def _get_stride(param):
    if param.stride_h > 0:
        return param.stride_h, param.stride_w
    elif type(param.stride) == int:
        return param.stride
    elif len(param.stride) == 0:
        return 1
    elif len(param.stride) == 1:
        return param.stride[0]
    else:
        return param.stride


def _get_pad(param):
    if param.pad_h > 0:
        return param.pad_h, param.pad_w
    elif type(param.pad) == int:
        return param.pad
    elif len(param.pad) == 0:
        return 0
    elif len(param.pad) == 1:
        return param.pad[0]
    else:
        return param.pad


def _get_num(blob):
    if blob.num > 0:
        return blob.num
    else:
        return blob.shape.dim[0]


def _get_channels(blob):
    if blob.channels > 0:
        return blob.channels
    else:
        return blob.shape.dim[1]


def _get_height(blob):
    if blob.height > 0:
        return blob.height
    elif len(blob.shape.dim) == 2:
        return blob.shape.dim[0]
    elif len(blob.shape.dim) == 4:
        return blob.shape.dim[2]
    else:
        raise RuntimeError(
            '{}-dimentional array is not supported'.format(
                len(blob.shape.dim)))


def _get_width(blob):
    if blob.width > 0:
        return blob.width
    elif len(blob.shape.dim) == 2:
        return blob.shape.dim[1]
    elif len(blob.shape.dim) == 4:
        return blob.shape.dim[3]
    else:
        raise RuntimeError(
            '{}-dimentional array is not supported'.format(
                len(blob.shape.dim)))


# Internal class

class _SingleArgumentFunction(object):

    def __init__(self, func, *args, **kwargs):
        self.func = func
        self.args = args
        self.kwargs = kwargs

    def __call__(self, x):
        return self.func(x, *self.args, **self.kwargs)


class _ListArgumentFcuntion(object):

    def __init__(self, func, **kwargs):
        self.func = func
        self.kwargs = kwargs

    def __call__(self, *xs):
        return self.func(xs, **self.kwargs)


class _DropoutFunction(object):

    def __init__(self, caffe_func, ratio):
        # `caffe_func.train` is determined when calling `__call__`
        self.caffe_func = caffe_func
        self.ratio = ratio

    def __call__(self, x):
        return functions.dropout(
            x, ratio=self.ratio, train=self.caffe_func.train)


class _CallChildLink(object):

    def __init__(self, caffe_func, name):
        self.name = name
        self.caffe_func = caffe_func

    def __call__(self, *xs, **kwargs):
        return self.caffe_func[self.name](*xs, **kwargs)


class _EltwiseFunction(object):

    def __init__(self, operation, coeffs=None):
        if coeffs is not None:
            assert len(coeffs) > 0
        self.operation = operation
        self.coeffs = coeffs

    def __call__(self, *xs):
        operation = self.operation

        if operation == 0:      # PROD
            return six.moves.reduce(lambda x, y: x * y, xs),

        elif operation == 1:    # SUM
            coeffs = self.coeffs
            if coeffs is not None:
                assert len(xs) == len(coeffs)
                xs = [x * coeff for x, coeff in zip(xs, coeffs)]
            return six.moves.reduce(lambda x, y: x + y, xs),

        elif operation == 2:    # MAX
            return six.moves.reduce(lambda x, y: functions.maximum(x, y), xs),

        else:
            raise ValueError('Invalid EltwiseParameter.EltwiseOp value.')