import collections
import copy
import warnings
import numpy
import six
from chainer import cuda
from chainer import initializers
import chainer.serializer
from chainer import variable
def _is_shape(value):
if value is None:
return True
elif isinstance(value, collections.Sequence):
try:
return all(int(x) for x in value)
except TypeError:
return False
try:
return int(value)
except TypeError:
return False
def _ensure_shape_dtype(value):
# Return value paired with dtype FP32 if it is a shape.
if _is_shape(value):
return value, 'f'
# Otherwise, returns it with assuming a shape-dtype pair.
else:
return value
[docs]class Link(object):
"""Building block of model definitions.
Link is a building block of neural network models that support various
features like handling parameters, defining network fragments,
serialization, etc.
Link is the primitive structure for the model definitions. It supports
management of parameter variables and *persistent values* that should be
incorporated to serialization. Parameters are variables registered via
the :meth:`add_param` method, or given to the initializer method.
Persistent values are arrays, scalars, or any other serializable values
registered via the :meth:`add_persistent` method.
.. note::
Whereas arbitrary serializable objects can be registered as persistent
values, it is strongly recommended to just register values that should
be treated as results of learning. A typical example of persistent
values is ones computed during training and required for testing, e.g.
running statistics for batch normalization.
Parameters and persistent values are referred by their names. They can be
accessed as attributes of the links. Link class itself manages the lists
of names of parameters and persistent values to distinguish parameters and
persistent values from other attributes.
Link can be composed into more complex models. This composition feature is
supported by child classes like :class:`Chain` and :class:`ChainList`. One
can create a chain by combining one or more links. See the documents for
these classes for details.
As noted above, Link supports the serialization protocol of the
:class:`~chainer.Serializer` class. **Note that only parameters and
persistent values are saved and loaded.** Other attributes are considered
as a part of user program (i.e. a part of network definition). In order to
construct a link from saved file, other attributes must be identically
reconstructed by user codes.
.. admonition:: Example
This is a simple example of custom link definition. Chainer itself also
provides many links defined under the :mod:`~chainer.links` module. They
might serve as examples, too.
Consider we want to define a simple primitive link that implements a
fully-connected layer based on the :func:`~functions.linear` function.
Note that this function takes input units, a weight variable, and a bias
variable as arguments. Then, the fully-connected layer can be defined as
follows::
import chainer
import chainer.functions as F
import numpy as np
class LinearLayer(chainer.Link):
def __init__(self, n_in, n_out):
# Parameters are initialized as a numpy array of given shape.
super(LinearLayer, self).__init__(
W=(n_out, n_in),
b=(n_out,),
)
self.W.data[...] = np.random.randn(n_out, n_in)
self.b.data.fill(0)
def __call__(self, x):
return F.linear(x, self.W, self.b)
This example shows that a user can define arbitrary parameters and use
them in any methods. Links typically implement the ``__call__``
operator.
Args:
params: Names, shapes, and optional dtypes of initial parameters. The
keywords are used as the parameter names and the corresponding
values consist either of the shape or a tuple of shape and a dtype
`(shape, dtype)`. If only the shape is supplied, the default dtype
will be used.
Attributes:
name (str): Name of this link, given by the parent chain (if exists).
"""
def __init__(self, **params):
self._params = []
self._persistent = []
self._uninitialized_params = {}
self._cpu = True
self._device_id = None
self.name = None
for name, value in six.iteritems(params):
shape, dtype = _ensure_shape_dtype(value)
self.add_param(name, shape, dtype=dtype)
@property
def xp(self):
"""Array module for this link.
Depending on which of CPU/GPU this link is on, this property returns
:mod:`numpy` or :mod:`cupy`.
"""
return numpy if self._cpu else cuda.cupy
[docs] def add_param(self, name, shape, dtype=numpy.float32, initializer=None):
"""Registers a parameter to the link.
The registered parameter is saved and loaded on serialization and
deserialization, and involved in the optimization. The data and
gradient of the variable are initialized by NaN arrays.
If ``initializer`` is not ``None``, the data is initialized by
``initializer``.
If the supplied ``name`` argument corresponds to an uninitialized
parameter (that is, one that was added with the
:meth:`add_uninitialized_param` method), ``name`` will be removed
from the set of uninitialized parameters.
The parameter is set to an attribute of the link with the given name.
Args:
name (str): Name of the parameter. This name is also used as the
attribute name. Any uninitialized parameters with the same
name will be removed.
shape (int or tuple of ints): Shape of the parameter array.
dtype: Data type of the parameter array.
initializer(chainer.initializer.Initializer): If it is not
``None``, the data is initialized with the given initializer.
Note that in this case ``dtype`` argument is ignored.
"""
d = self.__dict__
if name in d:
raise AttributeError(
'cannot register a new parameter %s: attribute exists'
% name)
if initializer is None:
data = self.xp.full(shape, numpy.nan, dtype=dtype)
else:
data = initializers.generate_array(initializer, shape, self.xp)
u = self._uninitialized_params.get(name)
if u is None:
grad = self.xp.full_like(data, numpy.nan)
else:
if u._cleared:
grad = None
elif u._zeroed:
grad = self.xp.zeros_like(data)
else:
grad = self.xp.full_like(data, numpy.nan)
var = variable.Variable(data, volatile='auto', name=name)
var.grad = grad
self._params.append(name)
d[name] = var
if name in self._uninitialized_params:
del self._uninitialized_params[name]
[docs] def add_uninitialized_param(self, name):
"""Registers an uninitialized parameter to the link.
An uninitialized parameter is defined as a parameter that has a name
but that does not yet have a shape. If the shape of a parameter
depends on the shape of the inputs to the ``__call__`` operator,
it can be useful to defer initialization (that is, setting the shape)
until the first forward call of the link. Such parameters are
intended to be defined as uninitialized parameters in the initializer
and then initialized during the first forward call.
An uninitialized parameter is intended to be registered to a link by
calling this method in the initializer method. Then, during the
first forward call, the shape of the parameter will be determined
from the size of the inputs and the parameter must be initialized by
calling the :meth:`add_param` method.
Args:
name: (str): Name of the uninitialized parameter.
"""
class uninitialized_param(object):
def __init__(self):
self._cleared = False
self._zeroed = False
d = self.__dict__
if (name in self._uninitialized_params) or (name in d):
raise AttributeError(
'cannot register a new uninitialized parameter %s: exists'
% name)
self._uninitialized_params[name] = uninitialized_param()
@property
def has_uninitialized_params(self):
"""Check if the link has uninitialized parameters.
Returns:
bool: ``True`` if the link has any uninitialized parameters.
Otherwise returns ``False``.
"""
return len(self._uninitialized_params) > 0
[docs] def add_persistent(self, name, value):
"""Registers a persistent value to the link.
The registered value is saved and loaded on serialization and
deserialization. The value is set to an attribute of the link.
Args:
name (str): Name of the persistent value. This name is also used
for the attribute name.
value: Value to be registered.
"""
d = self.__dict__
if name in d:
raise AttributeError(
'cannot register a new persistent value %s: attribute exists'
% name)
self._persistent.append(name)
d[name] = value
[docs] def copy(self):
"""Copies the link hierarchy to new one.
The whole hierarchy rooted by this link is copied. The copy is
basically shallow, except that the parameter variables are also
shallowly copied. It means that the parameter variables of copied one
are different from ones of original link, while they share the data and
gradient arrays.
The name of the link is reset on the copy, since the copied instance
does not belong to the original parent chain (even if exists).
Returns:
Link: Copied link object.
"""
ret = copy.copy(self)
ret._params = list(self._params)
ret._persistent = list(self._persistent)
ret.name = None
d = ret.__dict__
for name in ret._params:
d[name] = copy.copy(d[name])
d[name].grad = None
return ret
[docs] def to_cpu(self):
"""Copies parameter variables and persistent values to CPU.
This method does not handle non-registered attributes. If some of such
attributes must be copied to CPU, the link implementation must
override this method to do so.
Returns: self
"""
if self._cpu:
return self
d = self.__dict__
for name in self._params:
d[name].to_cpu()
for name in self._persistent:
value = d[name]
if isinstance(value, cuda.ndarray):
d[name] = value.get()
self._cpu = True
self._device_id = None
return self
[docs] def to_gpu(self, device=None):
"""Copies parameter variables and persistent values to GPU.
This method does not handle non-registered attributes. If some of such
attributes must be copied to GPU, the link implementation must
override this method to do so.
Args:
device: Target device specifier. If omitted, the current device is
used.
Returns: self
"""
cuda.check_cuda_available()
if not self._cpu:
return self
d = self.__dict__
with cuda._get_device(device):
for name in self._params:
d[name].to_gpu()
for name in self._persistent:
value = d[name]
if isinstance(value, numpy.ndarray):
d[name] = cuda.to_gpu(value)
self._device_id = cuda.cupy.cuda.get_device_id()
self._cpu = False
return self
[docs] def params(self):
"""Returns a generator of all parameters under the link hierarchy.
Returns:
A generator object that generates all parameters.
"""
d = self.__dict__
for name in self._params:
yield d[name]
[docs] def namedparams(self):
"""Returns a generator of all (path, param) pairs under the hierarchy.
Returns:
A generator object that generates all (path, parameter) pairs. The
paths are relative from this link.
"""
d = self.__dict__
for name in self._params:
yield '/' + name, d[name]
[docs] def links(self, skipself=False):
"""Returns a generator of all links under the hierarchy.
Args:
skipself (bool): If ``True``, then the generator skips this link
and starts with the first child link.
Returns:
A generator object that generates all links.
"""
if not skipself:
yield self
[docs] def namedlinks(self, skipself=False):
"""Returns a generator of all (path, link) pairs under the hierarchy.
Args:
skipself (bool): If ``True``, then the generator skips this link
and starts with the first child link.
Returns:
A generator object that generates all (path, link) pairs.
"""
if not skipself:
yield '/', self
[docs] def children(self):
"""Returns a generator of all child links.
Returns:
A generator object that generates all child links.
"""
if 0:
yield
[docs] def copyparams(self, link):
"""Copies all parameters from given link.
This method copies data arrays of all parameters in the hierarchy. The
copy is even done across the host and devices. Note that this method
does not copy the gradient arrays.
Args:
link (Link): Source link object.
"""
src = link.__dict__
dst = self.__dict__
for name in self._params:
dst[name].copydata(src[name])
# tuple() here is needed to avoid conflicts with add_param
for name in tuple(self._uninitialized_params):
if name in src:
src_param = src[name]
self.add_param(name, src_param.shape, src_param.dtype)
dst[name].copydata(src_param)
[docs] def cleargrads(self):
"""Clears all gradient arrays.
This method should be called before the backward computation at every
iteration of the optimization.
"""
for param in self.params():
param.cleargrad()
for link in self.links():
for param in link._uninitialized_params.values():
param._cleared = True
[docs] def zerograds(self):
"""Initializes all gradient arrays by zero.
This method can be used for the same purpose of cleargrads, but less
efficient. This method is left for backward compatibility.
.. deprecated:: v1.15
Use :meth:`cleargrads` instead.
"""
warnings.warn(
'Link.zerograds is deprecated. Use Link.cleargrads instead.',
DeprecationWarning)
for param in self.params():
param.zerograd()
for link in self.links():
for param in link._uninitialized_params.values():
param._zeroed = True
[docs] def addgrads(self, link):
"""Accumulates gradient values from given link.
This method adds each gradient array of the given link to corresponding
gradient array of this link. The accumulation is even done across
host and different devices.
Args:
link (Link): Source link object.
"""
src = link.__dict__
dst = self.__dict__
for name in self._params:
dst[name].addgrad(src[name])
[docs] def serialize(self, serializer):
"""Serializes the link object.
Args:
serializer (~chainer.AbstractSerializer): Serializer object.
"""
d = self.__dict__
for name in self._params:
serializer(name, d[name].data)
for name in self._persistent:
d[name] = serializer(name, d[name])
if (self.has_uninitialized_params and
isinstance(serializer, chainer.serializer.Serializer)):
raise ValueError("uninitialized parameters cannot be serialized")
for name in self._uninitialized_params.copy():
# Note: There should only be uninitialized parameters
# during deserialization.
initialized_value = serializer(name, None)
self.add_param(name, initialized_value.shape)
uninitialized_value = d[name].data
if isinstance(uninitialized_value, numpy.ndarray):
numpy.copyto(uninitialized_value, initialized_value)
elif isinstance(uninitialized_value, cuda.ndarray):
uninitialized_value.set(numpy.asarray(initialized_value))
[docs]class Chain(Link):
"""Composable link with object-like interface.
Composability is one of the most important features of neural nets. Neural
net models consist of many reusable fragments, and each model itself might
be embedded into a larger learnable system. Chain enables us to write a
neural net based on composition, without bothering about routine works like
collecting parameters, serialization, copying the structure with parameters
shared, etc.
This class actually provides a way to compose one or more links into one
structure. A chain can contain one or more *child links*. Child link is a
link registered to the chain with its own name. The child link is stored to
an attribute of the chain with the name. User can write a whole model or a
fragment of neural nets as a child class of Chain.
Each chain itself is also a link. Therefore, one can combine chains into
higher-level chains. In this way, links and chains construct a *link
hierarchy*. Link hierarchy forms a tree structure, where each node is
identified by the path from the root. The path is represented by a string
like a file path in UNIX, consisting of names of nodes on the path, joined
by slashes ``/``.
.. admonition:: Example
This is a simple example of custom chain definition. Chainer itself also
provides some chains defined under the :mod:`~chainer.links` module.
They might serve as examples, too.
Consider we want to define a multi-layer perceptron consisting of two
hidden layers with rectifiers as activation functions. We can use the
:class:`~chainer.links.Linear` link as a building block::
import chainer
import chainer.functions as F
import chainer.links as L
class MultiLayerPerceptron(chainer.Chain):
def __init__(self, n_in, n_hidden, n_out):
# Create and register three layers for this MLP
super(MultiLayerPerceptron, self).__init__(
layer1=L.Linear(n_in, n_hidden),
layer2=L.Linear(n_hidden, n_hidden),
layer3=L.Linear(n_hidden, n_out),
)
def __call__(self, x):
# Forward propagation
h1 = F.relu(self.layer1(x))
h2 = F.relu(self.layer2(h1))
return self.layer3(h2)
Child links are registered via the initializer method. They also can be
registered by the :meth:`add_link` method. The forward propagation is
often implemented as The ``__call__`` operator as the above example,
though it is not mandatory.
Args:
links: Child links. The keywords are used as their names. The names are
also set to the links.
"""
def __init__(self, **links):
super(Chain, self).__init__()
self._children = []
for name, link in six.iteritems(links):
self.add_link(name, link)
[docs] def __getitem__(self, name):
"""Equivalent to getattr."""
return getattr(self, name)
[docs] def add_link(self, name, link):
"""Registers a child link to this chain.
The registered link is saved and loaded on serialization and
deserialization, and involved in the optimization. The registered link
is called a child. The child link is set to an attribute of the chain
with the given name.
This method also sets the :attr:`~Link.name` attribute of the
registered link. If the given link already has the name attribute set,
then it raises an error.
Args:
name (str): Name of the child link. This name is also used as the
attribute name.
link (Link): The link object to be registered.
"""
if link.name is not None:
raise ValueError(
'given link is already registered to another chain by name %s'
% link.name)
d = self.__dict__
if name in d:
raise AttributeError(
'cannot register a new link %s: attribute exists' % name)
self._children.append(name)
link.name = name
d[name] = link
def copy(self):
ret = super(Chain, self).copy()
ret._children = list(ret._children)
d = ret.__dict__
for name in ret._children:
# copy child links recursively
copied = d[name].copy()
copied.name = name
d[name] = copied
return ret
def to_cpu(self):
super(Chain, self).to_cpu()
d = self.__dict__
for name in self._children:
d[name].to_cpu()
return self
def to_gpu(self, device=None):
with cuda._get_device(device):
super(Chain, self).to_gpu()
d = self.__dict__
for name in self._children:
d[name].to_gpu()
return self
def params(self):
for param in super(Chain, self).params():
yield param
d = self.__dict__
for name in self._children:
for param in d[name].params():
yield param
def namedparams(self):
for ret in super(Chain, self).namedparams():
yield ret
d = self.__dict__
for name in self._children:
prefix = '/' + name
for path, param in d[name].namedparams():
yield prefix + path, param
def links(self, skipself=False):
if not skipself:
yield self
d = self.__dict__
for name in self._children:
for link in d[name].links():
yield link
def namedlinks(self, skipself=False):
if not skipself:
yield '/', self
d = self.__dict__
for name in self._children:
child = d[name]
prefix = '/' + name
yield prefix, child
for path, link in d[name].namedlinks(True):
yield prefix + path, link
def children(self):
d = self.__dict__
for name in self._children:
yield d[name]
def copyparams(self, link):
super(Chain, self).copyparams(link)
src = link.__dict__
dst = self.__dict__
for name in self._children:
dst[name].copyparams(src[name])
def addgrads(self, link):
super(Chain, self).addgrads(link)
src = link.__dict__
dst = self.__dict__
for name in self._children:
dst[name].addgrads(src[name])
def serialize(self, serializer):
super(Chain, self).serialize(serializer)
d = self.__dict__
for name in self._children:
d[name].serialize(serializer[name])
[docs]class ChainList(Link):
"""Composable link with list-like interface.
This is another example of compositional link. Unlike :class:`Chain`, this
class can be used like a list of child links. Each child link is indexed by
a non-negative integer, and it maintains the current number of registered
child links. The :meth:`add_link` method inserts a new link at the end of
the list. It is useful to write a chain with arbitrary number of child
links, e.g. an arbitrarily deep multi-layer perceptron.
Note that this class does not implement all methods of :class:`list`.
Args:
links: Initial child links.
"""
def __init__(self, *links):
super(ChainList, self).__init__()
self._children = []
for link in links:
self.add_link(link)
[docs] def __getitem__(self, index):
"""Returns the child at given index.
Args:
index (int): Index of the child in the list.
Returns:
Link: The ``index``-th child link.
"""
return self._children[index]
def __iter__(self):
return iter(self._children)
[docs] def __len__(self):
"""Returns a number of children."""
return len(self._children)
[docs] def add_link(self, link):
"""Registers a child link to this chain.
The registered link is saved and loaded on serialization and
deserialization, and involved in the optimization. The registered link
is called a child. The child link is accessible via :meth:`children`
generator, which returns a generator running through the children in
registered order.
This method also sets the :attr:`~Link.name` attribute of the
registered link. If the given link already has the name attribute set,
then it raises an error.
Args:
link (Link): The link object to be registered.
"""
if link.name is not None:
raise ValueError(
'given link is already registered to another chain by name %s'
% link.name)
link.name = str(len(self._children))
self._children.append(link)
def copy(self):
ret = super(ChainList, self).copy()
ret._children = list(ret._children) # copy
children = ret._children
for i, child in enumerate(children):
child = child.copy()
child.name = str(i)
children[i] = child
return ret
def to_cpu(self):
super(ChainList, self).to_cpu()
for link in self._children:
link.to_cpu()
return self
def to_gpu(self, device=None):
with cuda._get_device(device):
super(ChainList, self).to_gpu()
for link in self._children:
link.to_gpu()
return self
def params(self):
for param in super(ChainList, self).params():
yield param
for link in self._children:
for param in link.params():
yield param
def namedparams(self):
for ret in super(ChainList, self).namedparams():
yield ret
for idx, link in enumerate(self._children):
prefix = '/%d' % idx
for path, param in link.namedparams():
yield prefix + path, param
def links(self, skipself=False):
if not skipself:
yield self
for child in self._children:
for link in child.links():
yield link
def namedlinks(self, skipself=False):
if not skipself:
yield '/', self
for idx, child in enumerate(self._children):
prefix = '/%d' % idx
yield prefix, child
for path, link in child.namedlinks(True):
yield prefix + path, link
def children(self):
for child in self._children:
yield child
def copyparams(self, link):
super(ChainList, self).copyparams(link)
for idx, child in enumerate(self._children):
child.copyparams(link[idx])
def addgrads(self, link):
super(ChainList, self).addgrads(link)
for idx, child in enumerate(self._children):
child.addgrads(link[idx])
def serialize(self, serializer):
super(ChainList, self).serialize(serializer)
for idx, child in enumerate(self._children):
child.serialize(serializer['%d' % idx])