Source code for chainer.functions.loss.contrastive

import numpy

from chainer import cuda
from chainer import function
from chainer.utils import type_check


class Contrastive(function.Function):

    """Contrastive loss function."""

    def __init__(self, margin, reduce='mean'):
        if margin <= 0:
            raise ValueError("margin should be positive value.")
        self.margin = margin

        if reduce not in ('mean', 'no'):
            raise ValueError(
                "only 'mean' and 'no' are valid for 'reduce', but '%s' is "
                'given' % reduce)
        self.reduce = reduce

    def check_type_forward(self, in_types):
        type_check.expect(in_types.size() == 3)

        x0_type, x1_type, y_type = in_types
        type_check.expect(
            x0_type.dtype == numpy.float32,
            x1_type.dtype == numpy.float32,
            y_type.dtype == numpy.int32,
            x0_type.shape == x1_type.shape,
            x1_type.shape[0] == y_type.shape[0],
            x1_type.shape[0] > 0,
            x0_type.ndim == 2,
            x1_type.ndim == 2,
            y_type.ndim == 1
        )

    def forward(self, inputs):
        xp = cuda.get_array_module(*inputs)
        x0, x1, y = inputs

        self.diff = x0 - x1
        self.dist_sq = xp.sum(self.diff ** 2, axis=1)
        self.dist = xp.sqrt(self.dist_sq)
        self.mdist = self.margin - self.dist
        dist = xp.maximum(self.mdist, 0)
        loss = (y * self.dist_sq + (1 - y) * dist * dist) * .5
        if self.reduce == 'mean':
            loss = xp.sum(loss) / x0.shape[0]
        return xp.array(loss, dtype=xp.float32),

    def backward(self, inputs, gy):
        xp = cuda.get_array_module(*inputs)
        x0, x1, y = inputs

        x_dim = x0.shape[1]
        y = xp.repeat(y[:, None], x_dim, axis=1)
        if self.reduce == 'mean':
            alpha = gy[0] / y.shape[0]
        else:
            alpha = gy[0][:, None]
        dist = xp.repeat(self.dist[:, None], x_dim, axis=1)
        # avoid division by zero
        dist = xp.maximum(dist, 1e-8)
        # similar pair
        gx0 = alpha * y * self.diff
        # dissimilar pair
        mdist = xp.repeat(self.mdist[:, None], x_dim, axis=1)
        mdist_p = xp.array(mdist > 0, dtype=xp.int32)
        gx0 += alpha * (1 - y) * mdist_p * mdist * -(self.diff / dist)
        gx0 = gx0.astype(xp.float32)

        return gx0, -gx0, None


[docs]def contrastive(x0, x1, y, margin=1, reduce='mean'): """Computes contrastive loss. It takes a pair of samples and a label as inputs. The label is :math:`1` when those samples are similar, or :math:`0` when they are dissimilar. Let :math:`N` and :math:`K` denote mini-batch size and the dimension of input variables, respectively. The shape of both input variables ``x0`` and ``x1`` should be ``(N, K)``. The loss value of the :math:`n`-th sample pair :math:`L_n` is .. math:: L_n = \\frac{1}{2} \\left( y_n d_n^2 + (1 - y_n) \\max ({\\rm margin} - d_n, 0)^2 \\right) where :math:`d_n = \\| {\\bf x_0}_n - {\\bf x_1}_n \\|_2`, :math:`{\\bf x_0}_n` and :math:`{\\bf x_1}_n` are :math:`n`-th K-dimensional vectors of ``x0`` and ``x1``. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the elementwise loss values. If it is ``'mean'``, this function takes a mean of loss values. Args: x0 (~chainer.Variable): The first input variable. The shape should be (N, K), where N denotes the mini-batch size, and K denotes the dimension of ``x0``. x1 (~chainer.Variable): The second input variable. The shape should be the same as ``x0``. y (~chainer.Variable): Labels. All values should be 0 or 1. The shape should be ``(N,)``, where N denotes the mini-batch size. margin (float): A parameter for contrastive loss. It should be positive value. reduce (str): Reduction option. Its value must be either ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable holding the loss value(s) calculated by the above equation. If ``reduce`` is ``'no'``, the output variable holds array whose shape is same as one of (hence both of) input variables. If it is ``'mean'``, the output variable holds a scalar value. .. note:: This cost can be used to train siamese networks. See `Learning a Similarity Metric Discriminatively, with Application to Face Verification <http://yann.lecun.com/exdb/publis/pdf/chopra-05.pdf>`_ for details. """ return Contrastive(margin, reduce)(x0, x1, y)