Source code for chainer.functions.loss.hinge

import numpy

from chainer import cuda
from chainer import function
from chainer.utils import type_check


def _hinge_fwd_kernel():
    return cuda.elementwise(
        'S t', 'raw T bottom_diff',
        'int ind[] = {i, t}; bottom_diff[ind] *= -1',
        'hinge_fwd')


class Hinge(function.Function):

    """Hinge loss."""

    def __init__(self, norm='L1', reduce='mean'):
        if norm in ['L1', 'L2']:
            self.norm = norm
        else:
            raise NotImplementedError("norm should be either 'L1' or 'L2'")

        if reduce in ['mean', 'no']:
            self.reduce = reduce
        else:
            raise ValueError(
                "only 'mean' and 'no' are valid for 'reduce', but '%s' is "
                'given' % reduce)

    def check_type_forward(self, in_types):
        type_check.expect(in_types.size() == 2)

        x_type, t_type = in_types
        type_check.expect(
            x_type.dtype == numpy.float32,
            t_type.dtype == numpy.int32,
            x_type.ndim == 2,
            t_type.ndim == 1,
            x_type.shape[0] == t_type.shape[0],
        )

    def forward_cpu(self, inputs):
        x, t = inputs
        num = len(x)
        self.bottom_diff = numpy.copy(x)
        self.bottom_diff[numpy.arange(num), t] *= -1
        self.bottom_diff = numpy.maximum(0, 1 + self.bottom_diff)

        if self.norm == 'L1':
            loss = self.bottom_diff
        elif self.norm == 'L2':
            loss = self.bottom_diff ** 2
        else:
            raise NotImplementedError()

        if self.reduce == 'mean':
            loss = loss.sum() / num

        return numpy.array(loss, dtype=x.dtype),

    def forward_gpu(self, inputs):
        x, t = inputs
        num = x.dtype.type(len(x))
        self.bottom_diff = cuda.cupy.maximum(
            0, 1 + _hinge_fwd_kernel()(t, x.copy()))
        if self.norm == 'L1':
            loss = self.bottom_diff
        elif self.norm == 'L2':
            loss = self.bottom_diff ** 2
        else:
            raise NotImplementedError()

        if self.reduce == 'mean':
            loss = loss.sum() / num

        return loss,

    def backward_cpu(self, inputs, grad_outputs):
        t, gloss = inputs[1], grad_outputs[0]

        if self.reduce == 'mean':
            gloss /= len(t)

        self.bottom_diff[numpy.arange(len(t)), t] *= -1
        if self.norm == 'L1':
            gx = gloss * numpy.sign(self.bottom_diff)
        elif self.norm == 'L2':
            gx = 2 * gloss * self.bottom_diff
        else:
            raise NotImplementedError()

        return gx, None

    def backward_gpu(self, inputs, grad_outputs):
        xp = cuda.get_array_module(*inputs)
        t, gloss = inputs[1], grad_outputs[0]

        if self.reduce == 'mean':
            gloss /= len(t)

        self.bottom_diff = _hinge_fwd_kernel()(t, self.bottom_diff)
        if self.norm == 'L1':
            gx = gloss * xp.sign(self.bottom_diff)
        elif self.norm == 'L2':
            gx = 2 * gloss * self.bottom_diff
        else:
            raise NotImplementedError()

        return gx, None


[docs]def hinge(x, t, norm='L1', reduce='mean'): """Computes the hinge loss for a one-of-many classification task. .. math:: L = \\frac{1}{N} \\sum_{n=1}^N \\sum_{k=1}^K \\left[ \\max(0, 1 - \\delta\\{t_n = k\\} x_{nk}) \\right]^p where :math:`N` denotes the batch size and :math:`K` is the number of classes of interest, .. math:: \\delta \\{ {\\rm condition} \\} = \\left \\{ \\begin{array}{cc} 1 & {\\rm if~condition\ is\ true} \\\\ -1 & {\\rm otherwise,} \\end{array} \\right. and .. math:: p = \\left \\{ \\begin{array}{cc} 1 & {\\rm if~norm} = {\\rm L1} \\\\ 2 & {\\rm if~norm} = {\\rm L2.} \\end{array} \\right. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the elementwise loss values. If it is ``'mean'``, it takes the mean of loss values. Args: x (~chainer.Variable): Input variable. The shape of ``x`` should be (:math:`N`, :math:`K`). t (~chainer.Variable): The :math:`N`-dimensional label vector with values :math:`t_n \in \{0, 1, 2, \dots, K-1\}`. The shape of ``t`` should be (:math:`N`,). norm (string): Specifies norm type. Either ``'L1'`` or ``'L2'`` is acceptable. reduce (str): Reduction option. Its value must be either ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable object holding a scalar array of the hinge loss :math:`L`. If ``reduce`` is ``'no'``, the output variable holds array whose shape is same as one of (hence both of) input variables. If it is ``'mean'``, the output variable holds a scalar value. """ return Hinge(norm, reduce)(x, t)