Source code for chainer.optimizers.smorms3

import numpy

from chainer import cuda
from chainer import optimizer


[docs]class SMORMS3(optimizer.GradientMethod): """Simon Funk's SMORMS3. See http://sifter.org/~simon/journal/20150420.html. """ def __init__(self, lr=0.001, eps=1e-16): self.lr = lr self.eps = eps def init_state(self, param, state): xp = cuda.get_array_module(param.data) with cuda.get_device_from_array(param.data): state['mem'] = xp.ones_like(param.data) state['g'] = xp.zeros_like(param.data) state['g2'] = xp.zeros_like(param.data) def update_one_cpu(self, param, state): mem, g, g2 = state['mem'], state['g'], state['g2'] grad = param.grad r = 1 / (mem + 1) g = (1 - r) * g + r * grad g2 = (1 - r) * g2 + r * grad * grad x = g * g / (g2 + self.eps) param.data -= grad * numpy.minimum(x, self.lr) \ / (numpy.sqrt(g2) + self.eps) mem = 1 + mem * (1 - x) state['mem'], state['g'], state['g2'] = mem, g, g2 def update_one_gpu(self, param, state): cuda.elementwise( 'T grad, T lr, T eps', 'T param, T mem, T g, T g2', '''T r, x; r = 1 / (mem + 1); g = (1 - r) * g + r * grad; g2 = (1 - r) * g2 + r * grad * grad; x = g * g / (g2 + eps); param -= grad * min(lr, x) / (sqrt(g2) + eps); mem = 1 + mem * (1 - x) ''', 'smorms3')(param.grad, self.lr, self.eps, param.data, state['mem'], state['g'], state['g2'])