# Implementation of stochastic gradient Langevin dynamics (SGLD).
#
# Reference: Bayesian Learning via Stochastic Gradient Langevin Dynamics 
#            (https://www.ics.uci.edu/~welling/publications/papers/stoclangevin_v6.pdf)

from numpy import sqrt
import torch
from torch.optim import Optimizer


class SGLD(Optimizer):

    def __init__(self, params, h):
        if h < 0.0:
            raise ValueError("Invalid step size: {}".format(h))
        defaults = dict(h=h)
        super(SGLD, self).__init__(params, defaults)

    def step(self, closure=None):
        if closure is not None:
            closure()

        for group in self.param_groups:
            h = group['h']
            for param in group['params']:
                if param.grad is None:
                    continue
                grad = param.grad.data
                noise = param.new(torch.randn_like(param))
                param.data.add_(-h * grad + sqrt(2 * h) * noise)