import collections
import pkg_resources
import sys
import warnings
import numpy
import six
from chainer import functions
from chainer import link
from chainer import links
def _protobuf3():
ws = pkg_resources.WorkingSet()
try:
ws.require('protobuf>=3.0.0a')
return True
except pkg_resources.VersionConflict:
return False
if _protobuf3():
from chainer.links.caffe.protobuf3 import caffe_pb2 as caffe_pb
available = True
try:
# This method is undocumented, but is required to read large size of
# model files when a user uses cpp-implementation.
from google.protobuf.pyext import _message
_message.SetAllowOversizeProtos(True)
except ImportError:
pass
elif sys.version_info < (3, 0, 0):
# caffe_pb2 does not support Py3
from chainer.links.caffe.protobuf2 import caffe_pb2 as caffe_pb
available = True
else:
available = False
if available:
_type_to_method = {}
_oldname_to_method = {}
def _layer(typ, oldname):
def decorator(meth):
global _type_to_method
_type_to_method[typ] = meth
if oldname is not None:
typevalue = getattr(caffe_pb.V1LayerParameter, oldname)
_oldname_to_method[typevalue] = meth
return meth
return decorator
else:
def _layer(typ, oldname): # fallback
def decorator(meth):
return meth
return decorator
[docs]class CaffeFunction(link.Chain):
"""Caffe emulator based on the model file of Caffe.
Given a protocol buffers file of a Caffe model, this class loads and
emulates it on :class:`~chainer.Variable` objects. It supports the official
reference models provided by BVLC.
.. note::
protobuf>=3.0.0 is required if you use Python 3 because protobuf 2 is
not supported on Python 3.
.. note::
CaffeFunction ignores the following layers:
- Layers that CaffeFunction does not support (including data layers)
- Layers that have no top blobs
- Layers whose bottom blobs are incomplete (i.e., some or all of them
are not given nor computed)
.. warning::
It does not support full compatibility against Caffe. Some layers and
configurations are not implemented in Chainer yet, though the reference
models provided by the BVLC team are supported except data layers.
.. admonition:: Example
Consider we want to extract the (unnormalized) log class probability
of given images using BVLC reference CaffeNet. The model can be
downloaded from:
http://dl.caffe.berkeleyvision.org/bvlc_reference_caffenet.caffemodel
We want to compute the ``fc8`` blob from the ``data`` blob. It is simply
written as follows::
# Load the model
func = CaffeFunction('path/to/bvlc_reference_caffenet.caffemodel')
# Minibatch of size 10
x_data = numpy.ndarray((10, 3, 227, 227), dtype=numpy.float32)
... # (Fill the minibatch here)
# Forward the pre-trained net
x = Variable(x_data)
y, = func(inputs={'data': x}, outputs=['fc8'])
The result ``y`` contains the Variable corresponding to the ``fc8``
blob. The computational graph is memorized as a usual forward
computation in Chainer, so we can run backprop through this pre-trained
net.
Args:
model_path (str): Path to the binary-proto model file of Caffe.
Attributes:
forwards (dict): A mapping from layer names to corresponding functions.
"""
def __init__(self, model_path):
if not available:
msg = 'CaffeFunction is only supported on protobuf>=3 in Python3'
raise RuntimeError(msg)
super(CaffeFunction, self).__init__()
net = caffe_pb.NetParameter()
with open(model_path, 'rb') as model_file:
net.MergeFromString(model_file.read())
self.forwards = {}
self.split_map = {}
self.layers = []
if net.layer:
for layer in net.layer:
meth = _type_to_method.get(layer.type)
if meth:
meth(self, layer)
else:
warnings.warn(
'Skip the layer "%s", since CaffeFunction does not'
'support %s layer' % (layer.name, layer.type))
else: # v1 format
for layer in net.layers:
meth = _oldname_to_method.get(layer.type)
if meth:
meth(self, layer)
else:
warnings.warn(
'Skip the layer "%s", since CaffeFunction does not'
'support it' % layer.name)
[docs] def __call__(self, inputs, outputs, disable=(), train=True):
"""Executes a sub-network of the network.
This function acts as an interpreter of the network definition for
Caffe. On execution, it interprets each layer one by one, and if the
bottom blobs are already computed, then emulates the layer and stores
output blobs as :class:`~chainer.Variable` objects.
Args:
inputs (dict): A dictionary whose key-value pairs indicate initial
correspondences between blob names and
:class:`~chainer.Variable` objects.
outputs (Iterable): A list of blob names whose corresponding
:class:`~chainer.Variable` objects are returned.
disable (Iterable): A list of layer names that will be ignored
during the forward computation.
train (bool): If ``True``, this function emulates the TRAIN phase
of the Caffe layers. Otherwise, it emulates the TEST phase.
Returns:
tuple: A tuple of output :class:`~chainer.Variable` objects
corresponding to elements of the `outputs` argument.
"""
self.train = train
variables = dict(inputs)
for func_name, bottom, top in self.layers:
if (func_name in disable or
func_name not in self.forwards or
any(blob not in variables for blob in bottom)):
continue
func = self.forwards[func_name]
input_vars = tuple(variables[blob] for blob in bottom)
output_vars = func(*input_vars)
if not isinstance(output_vars, collections.Iterable):
output_vars = output_vars,
for var, name in zip(output_vars, top):
variables[name] = var
self.variables = variables
return tuple(variables[blob] for blob in outputs)
def _add_layer(self, layer):
bottom = []
for blob_name in layer.bottom:
bottom.append(self.split_map.get(blob_name, blob_name))
self.layers.append((layer.name, bottom, list(layer.top)))
@_layer('Concat', 'CONCAT')
def _setup_concat(self, layer):
param = layer.concat_param
axis = param.axis
if axis == 1 and param.concat_dim != 1:
axis = param.concat_dim
self.forwards[layer.name] = _ListArgumentFcuntion(
functions.concat, axis=axis)
self._add_layer(layer)
@_layer('Convolution', 'CONVOLUTION')
def _setup_convolution(self, layer):
blobs = layer.blobs
param = layer.convolution_param
ksize = _get_ksize(param)
stride = _get_stride(param)
pad = _get_pad(param)
num = _get_num(blobs[0])
channels = _get_channels(blobs[0])
n_in = channels * param.group
n_out = num
func = links.Convolution2D(n_in, n_out, ksize, stride, pad,
nobias=not param.bias_term)
func.W.data[...] = 0
part_size = len(blobs[0].data) // param.group
for i in six.moves.range(param.group):
in_slice = slice(i * n_in // param.group,
(i + 1) * n_in // param.group)
out_slice = slice(i * n_out // param.group,
(i + 1) * n_out // param.group)
w = func.W.data[out_slice, in_slice]
data = numpy.array(
blobs[0].data[i * part_size:(i + 1) * part_size])
w[:] = data.reshape(w.shape)
if param.bias_term:
func.b.data[:] = blobs[1].data
self.add_link(layer.name, func)
self.forwards[layer.name] = _CallChildLink(self, layer.name)
self._add_layer(layer)
@_layer('Data', 'DATA')
def _setup_data(self, layer):
# We silently skip the data layer.
pass
@_layer('Dropout', 'DROPOUT')
def _setup_dropout(self, layer):
param = layer.dropout_param
self.forwards[layer.name] = _DropoutFunction(
self, ratio=param.dropout_ratio)
self._add_layer(layer)
@_layer('InnerProduct', 'INNER_PRODUCT')
def _setup_inner_product(self, layer):
param = layer.inner_product_param
bias_term = param.bias_term
if param.axis != 1:
raise RuntimeError(
'Non-default axis in InnerProduct is not supported')
blobs = layer.blobs
width, height = _get_width(blobs[0]), _get_height(blobs[0])
func = links.Linear(width, height, nobias=not bias_term)
func.W.data.ravel()[:] = blobs[0].data
if bias_term:
func.b.data[:] = blobs[1].data
self.add_link(layer.name, func)
self.forwards[layer.name] = _CallChildLink(self, layer.name)
self._add_layer(layer)
@_layer('LRN', 'LRN')
def _setup_lrn(self, layer):
param = layer.lrn_param
if param.norm_region != param.ACROSS_CHANNELS:
raise RuntimeError('Within-channel LRN is not supported')
fwd = _SingleArgumentFunction(
functions.local_response_normalization,
n=param.local_size, k=param.k,
alpha=param.alpha / param.local_size, beta=param.beta)
self.forwards[layer.name] = fwd
self._add_layer(layer)
@_layer('Pooling', 'POOLING')
def _setup_pooling(self, layer):
param = layer.pooling_param
ksize = _get_ksize(param)
stride = _get_stride(param)
pad = _get_pad(param)
if param.pool == param.MAX:
func = functions.max_pooling_2d
elif param.pool == param.AVE:
func = functions.average_pooling_2d
else:
raise RuntimeError('Stochastic pooling is not supported')
fw = _SingleArgumentFunction(func, ksize, stride=stride, pad=pad)
self.forwards[layer.name] = fw
self._add_layer(layer)
@_layer('ReLU', 'RELU')
def _setup_relu(self, layer):
slope = layer.relu_param.negative_slope
if slope != 0:
fw = _SingleArgumentFunction(functions.leaky_relu, slope=slope)
else:
fw = functions.relu
self.forwards[layer.name] = fw
self._add_layer(layer)
@_layer('BatchNorm', None)
def _setup_batchnorm(self, layer):
# Get layer parameters.
blobs = layer.blobs
param = layer.batch_norm_param
use_global_stats = param.use_global_stats
decay = param.moving_average_fraction
eps = param.eps
size = int(blobs[0].shape.dim[0]) # Get channel dim from mean blob.
# Make BatchNormalization link.
func = links.BatchNormalization(size, decay=decay, eps=eps,
use_gamma=False, use_beta=False)
func.avg_mean.ravel()[:] = blobs[0].data
func.avg_var.ravel()[:] = blobs[1].data
self.add_link(layer.name, func)
# Add layer.
fwd = _SingleArgumentFunction(
_CallChildLink(self, layer.name),
test=use_global_stats, finetune=False)
self.forwards[layer.name] = fwd
self._add_layer(layer)
@_layer('Eltwise', 'ELTWISE')
def _setup_eltwise(self, layer):
# stable_prod_grad parameter is not supported now.
operation = layer.eltwise_param.operation
coeffs = layer.eltwise_param.coeff or None
self.forwards[layer.name] = _EltwiseFunction(operation, coeffs)
self._add_layer(layer)
@_layer('Scale', None)
def _setup_scale(self, layer):
# Following parameters are not supported now:
# - negative axis
# - num_axes
# - filler
# - bias_filler
# Get layer parameters.
bottom = layer.bottom
blobs = layer.blobs
axis = layer.scale_param.axis
bias_term = layer.scale_param.bias_term
# Case of only one bottom where W is learnt parameter.
if len(bottom) == 1:
W_shape = blobs[0].shape.dim
func = links.scale.Scale(axis, W_shape, bias_term)
func.W.data.ravel()[:] = blobs[0].data
if bias_term:
func.bias.b.data.ravel()[:] = blobs[1].data
# Case of two bottoms where W is given as a bottom.
else:
shape = blobs[0].shape.dim if bias_term else None
func = links.scale.Scale(
axis, bias_term=bias_term, bias_shape=shape)
if bias_term:
func.bias.b.data.ravel()[:] = blobs[0].data
# Add layer.
self.add_link(layer.name, func)
self.forwards[layer.name] = _CallChildLink(self, layer.name)
self._add_layer(layer)
@_layer('Slice', 'SLICE')
def _setup_slice(self, layer):
if layer.slice_param.HasField('axis'):
axis = layer.slice_param.axis
elif layer.slice_param.HasField('slice_dim'):
axis = layer.slice_param.slice_dim
else:
axis = 1
if layer.slice_param.slice_point:
indices_or_sections = list(layer.slice_param.slice_point)
else:
indices_or_sections = len(list(layer.top))
self.forwards[layer.name] = _SingleArgumentFunction(
functions.split_axis,
indices_or_sections=indices_or_sections,
axis=axis
)
self._add_layer(layer)
@_layer('Softmax', 'SOFTMAX')
def _setup_softmax(self, layer):
if layer.softmax_param.axis != 1:
raise RuntimeError(
'Softmax along non-channel axis is not supported')
if layer.softmax_param.engine == 0: # DEFAULT
fw = functions.softmax
elif layer.softmax_param.engine == 1: # CAFFE
fw = _SingleArgumentFunction(functions.softmax, use_cudnn=False)
elif layer.softmax_param.engine == 2: # CUDNN
fw = _SingleArgumentFunction(functions.softmax, use_cudnn=True)
self.forwards[layer.name] = fw
self._add_layer(layer)
@_layer('SoftmaxWithLoss', 'SOFTMAX_LOSS')
def _setup_softmax_with_loss(self, layer):
if layer.softmax_param.axis != 1:
raise RuntimeError(
'Softmax along non-channel axis is not supported')
self.forwards[layer.name] = functions.softmax_cross_entropy
self._add_layer(layer)
@_layer('Split', 'SPLIT')
def _setup_split(self, layer):
for top in layer.top:
self.split_map[top] = layer.bottom[0]
# Internal functions
def _get_ksize(param):
if param.kernel_h > 0:
return param.kernel_h, param.kernel_w
elif type(param.kernel_size) == int:
return param.kernel_size
elif len(param.kernel_size) == 1:
return param.kernel_size[0]
else:
return param.kernel_size
def _get_stride(param):
if param.stride_h > 0:
return param.stride_h, param.stride_w
elif type(param.stride) == int:
return param.stride
elif len(param.stride) == 0:
return 1
elif len(param.stride) == 1:
return param.stride[0]
else:
return param.stride
def _get_pad(param):
if param.pad_h > 0:
return param.pad_h, param.pad_w
elif type(param.pad) == int:
return param.pad
elif len(param.pad) == 0:
return 0
elif len(param.pad) == 1:
return param.pad[0]
else:
return param.pad
def _get_num(blob):
if blob.num > 0:
return blob.num
else:
return blob.shape.dim[0]
def _get_channels(blob):
if blob.channels > 0:
return blob.channels
else:
return blob.shape.dim[1]
def _get_height(blob):
if blob.height > 0:
return blob.height
elif len(blob.shape.dim) == 2:
return blob.shape.dim[0]
elif len(blob.shape.dim) == 4:
return blob.shape.dim[2]
else:
raise RuntimeError(
'{}-dimentional array is not supported'.format(
len(blob.shape.dim)))
def _get_width(blob):
if blob.width > 0:
return blob.width
elif len(blob.shape.dim) == 2:
return blob.shape.dim[1]
elif len(blob.shape.dim) == 4:
return blob.shape.dim[3]
else:
raise RuntimeError(
'{}-dimentional array is not supported'.format(
len(blob.shape.dim)))
# Internal class
class _SingleArgumentFunction(object):
def __init__(self, func, *args, **kwargs):
self.func = func
self.args = args
self.kwargs = kwargs
def __call__(self, x):
return self.func(x, *self.args, **self.kwargs)
class _ListArgumentFcuntion(object):
def __init__(self, func, **kwargs):
self.func = func
self.kwargs = kwargs
def __call__(self, *xs):
return self.func(xs, **self.kwargs)
class _DropoutFunction(object):
def __init__(self, caffe_func, ratio):
# `caffe_func.train` is determined when calling `__call__`
self.caffe_func = caffe_func
self.ratio = ratio
def __call__(self, x):
return functions.dropout(
x, ratio=self.ratio, train=self.caffe_func.train)
class _CallChildLink(object):
def __init__(self, caffe_func, name):
self.name = name
self.caffe_func = caffe_func
def __call__(self, *xs, **kwargs):
return self.caffe_func[self.name](*xs, **kwargs)
class _EltwiseFunction(object):
def __init__(self, operation, coeffs=None):
if coeffs is not None:
assert len(coeffs) > 0
self.operation = operation
self.coeffs = coeffs
def __call__(self, *xs):
operation = self.operation
if operation == 0: # PROD
return six.moves.reduce(lambda x, y: x * y, xs),
elif operation == 1: # SUM
coeffs = self.coeffs
if coeffs is not None:
assert len(xs) == len(coeffs)
xs = [x * coeff for x, coeff in zip(xs, coeffs)]
return six.moves.reduce(lambda x, y: x + y, xs),
elif operation == 2: # MAX
return six.moves.reduce(lambda x, y: functions.maximum(x, y), xs),
else:
raise ValueError('Invalid EltwiseParameter.EltwiseOp value.')