import math
from chainer import cuda
from chainer.functions.connection import linear
from chainer import initializers
from chainer import link
[docs]class Linear(link.Link):
"""Linear layer (a.k.a. fully-connected layer).
This is a link that wraps the :func:`~chainer.functions.linear` function,
and holds a weight matrix ``W`` and optionally a bias vector ``b`` as
parameters.
The weight matrix ``W`` is initialized with i.i.d. Gaussian samples, each
of which has zero mean and deviation :math:`\\sqrt{1/\\text{in_size}}`. The
bias vector ``b`` is of size ``out_size``. Each element is initialized with
the ``bias`` value. If ``nobias`` argument is set to True, then this link
does not hold a bias vector.
Args:
in_size (int or None): Dimension of input vectors. If ``None``,
parameter initialization will be deferred until the first forward
data pass at which time the size will be determined.
out_size (int): Dimension of output vectors.
wscale (float): Scaling factor of the weight matrix.
bias (float): Initial bias value.
nobias (bool): If ``True``, then this function does not use the bias.
initialW (2-D array): Initial weight value. If ``None``, then this
function uses Gaussian distribution scaled by ``w_scale`` to
initialize weight.
May also be a callable that takes ``numpy.ndarray`` or
``cupy.ndarray`` and edits its value.
initial_bias (1-D array): Initial bias value. If ``None``, then this
function uses ``bias`` to initialize bias.
May also be a callable that takes ``numpy.ndarray`` or
``cupy.ndarray`` and edits its value.
.. seealso:: :func:`~chainer.functions.linear`
Attributes:
W (~chainer.Variable): Weight parameter.
b (~chainer.Variable): Bias parameter.
"""
def __init__(self, in_size, out_size, wscale=1, bias=0, nobias=False,
initialW=None, initial_bias=None):
super(Linear, self).__init__()
# For backward compatibility
self.initialW = initialW
self.wscale = wscale
self.out_size = out_size
# For backward compatibility, the scale of weights is proportional to
# the square root of wscale.
self._W_initializer = initializers._get_initializer(
initialW, math.sqrt(wscale))
if in_size is None:
self.add_uninitialized_param('W')
else:
self._initialize_params(in_size)
if nobias:
self.b = None
else:
if initial_bias is None:
initial_bias = bias
bias_initializer = initializers._get_initializer(initial_bias)
self.add_param('b', out_size, initializer=bias_initializer)
def _initialize_params(self, in_size):
self.add_param('W', (self.out_size, in_size),
initializer=self._W_initializer)
[docs] def __call__(self, x):
"""Applies the linear layer.
Args:
x (~chainer.Variable): Batch of input vectors.
Returns:
~chainer.Variable: Output of the linear layer.
"""
if self.has_uninitialized_params:
with cuda.get_device_from_id(self._device_id):
self._initialize_params(x.size // x.shape[0])
return linear.linear(x, self.W, self.b)