Source code for chainer.optimizers.rmsprop_graves

import numpy

from chainer import cuda
from chainer import optimizer


[docs]class RMSpropGraves(optimizer.GradientMethod): """Alex Graves's RMSprop. See https://arxiv.org/abs/1308.0850 """ def __init__(self, lr=1e-4, alpha=0.95, momentum=0.9, eps=1e-4): # Default parameter values are the ones in the original paper. self.lr = lr self.alpha = alpha self.eps = eps self.momentum = momentum def init_state(self, param, state): xp = cuda.get_array_module(param.data) with cuda.get_device_from_array(param.data): state['n'] = xp.zeros_like(param.data) state['g'] = xp.zeros_like(param.data) state['delta'] = xp.zeros_like(param.data) def update_one_cpu(self, param, state): n, g, delta = state['n'], state['g'], state['delta'] grad = param.grad n *= self.alpha n += (1 - self.alpha) * grad * grad g *= self.alpha g += (1 - self.alpha) * grad delta *= self.momentum delta -= self.lr * grad / numpy.sqrt(n - g * g + self.eps) param.data += delta def update_one_gpu(self, param, state): cuda.elementwise( 'T grad, T lr, T alpha, T momentum, T eps', 'T param, T avg_n, T avg_g, T delta', '''avg_n = alpha * avg_n + (1 - alpha) * grad * grad; avg_g = alpha * avg_g + (1 - alpha) * grad; delta = delta * momentum - lr * grad * rsqrt(avg_n - avg_g * avg_g + eps); param += delta;''', 'rmsprop_graves')( param.grad, self.lr, self.alpha, self.momentum, self.eps, param.data, state['n'], state['g'], state['delta'])