Source code for archai.common.cocob

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""
The code below is 
directly from https://raw.githubusercontent.com/IssamLaradji/sls/master/others/cocob.py
Two coin betting optimization algorithms are implemented here :
Cocob Backprop: https://arxiv.org/pdf/1705.07795.pdf
Cocob through Ons: https://arxiv.org/pdf/1705.07795.pdf
both of which do not require any learning rates and yet
have optimal convergence gauarantees for non-smooth
convex functions.

Cocob-Ons is an experimental variation from paper.
Please don't use it yet.

Please check http://francesco.orabona.com/papers/slides_cocob.pdf for
simple explanation for going from coin betting game to convex optimization.
Both algorithms are similar except the coin betting strategy used.
"""

import torch
from torch import optim
import math

[docs]class CocobBackprop(optim.Optimizer): """Implements Cocob-Backprop . It has been proposed in `Training Deep Networks without Learning Rates Through Coin Betting`__. Arguments: params (iterable): iterable of parameters to optimize or dicts defining parameter groups alpha (float, optional): positive number to adjust betting fraction. Theoretical convergence gauarantee does not depend on choice of alpha (default: 100.0) __ https://arxiv.org/pdf/1705.07795.pdf """ def __init__(self, params, alpha=100.0, eps=1e-8): self.alpha = alpha self.eps = eps defaults = dict(alpha=alpha, eps=eps) super(CocobBackprop, self).__init__(params, defaults)
[docs] def step(self, closure=None): loss = None if closure is not None: loss = closure() for group in self.param_groups: for param in group['params']: if param.grad is None: continue grad = param.grad.data state = self.state[param] param_shape = param.shape # Better bets for -ve gradient neg_grad = - grad if len(state) == 0: # Happens only once at the begining of optimization start # Set initial parameter weights and zero reward state['initial_weight'] = param.data state['reward'] = param.new_zeros(param_shape) # Don't bet anything for first round state['bet'] = param.new_zeros(param_shape) # Initialize internal states useful for computing betting fraction state['neg_grads_sum'] = param.new_zeros(param_shape) state['grads_abs_sum'] = param.new_zeros(param_shape) state['max_observed_scale'] = self.eps * param.new_ones(param_shape) # load states in variables initial_weight = state['initial_weight'] reward = state['reward'] bet = state['bet'] neg_grads_sum = state['neg_grads_sum'] grads_abs_sum = state['grads_abs_sum'] max_observed_scale = state['max_observed_scale'] # Update internal states useful for computing betting fraction max_observed_scale = torch.max(max_observed_scale, torch.abs(grad)) grads_abs_sum += torch.abs(grad) neg_grads_sum += neg_grad # Based on how much the Better bets on -ve gradient prediction, # check how much the Better won (-ve if lost) win_amount = bet * neg_grad # Update better's reward. Negative reward is not allowed. reward = torch.max(reward + win_amount, torch.zeros_like(reward)) # Better decides the bet fraction based on so-far observations bet_fraction = neg_grads_sum / (max_observed_scale * (torch.max(grads_abs_sum + max_observed_scale, self.alpha * max_observed_scale))) # Better makes the bet according to decided betting fraction. bet = bet_fraction * (max_observed_scale + reward) # Set parameter weights param.data = initial_weight + bet # save state back in memory state['neg_grads_sum'] = neg_grads_sum state['grads_abs_sum'] = grads_abs_sum state['max_observed_scale'] = max_observed_scale state['reward'] = reward state['bet'] = bet # For Cocob-Backprop bet_fraction need not be maintained in state. Only kept for visualization. state['bet_fraction'] = bet_fraction return loss
[docs]class CocobOns(optim.Optimizer): """Implements Coin-Betting through ONS . It has been proposed in `Black-Box Reductions for Parameter-free Online Learning in Banach Spaces`__. Cocob-Ons is an experimental variation from the paper. Do not use it yet. Arguments: params (iterable): iterable of parameters to optimize or dicts defining parameter groups eps (float, optional): positive initial wealth for betting algorithm. Theoretical convergence gauarantee does not depend on choice of eps (default: 1e-8) __ https://arxiv.org/pdf/1705.07795.pdf """ def __init__(self, params, eps=1e-8): self.eps = eps defaults = dict(eps=eps) super(CocobOns, self).__init__(params, defaults)
[docs] def step(self, closure=None): loss = None if closure is not None: loss = closure() for group in self.param_groups: for param in group['params']: if param.grad is None: continue grad = param.grad.data state = self.state[param] param_shape = param.data.shape # Clip gradients to be in (-1, 1) grad.clamp_(-1.0, 1.0) # Better bets for -ve gradient neg_grad = - grad if len(state) == 0: # Happens only once at the begining of optimization start # Set initial parameter weights and zero reward state['initial_weight'] = param.data state['wealth'] = self.eps * param.new_ones(param_shape) # Don't bet anything for first round state['bet_fraction'] = param.new_zeros(param_shape) state['bet'] = param.new_zeros(param_shape) # Initialize internal states useful for computing betting fraction state['z_square_sum'] = param.new_zeros(param_shape) # load states in memory wealth = state['wealth'] bet_fraction = state['bet_fraction'] z_square_sum = state['z_square_sum'] initial_weight = state['initial_weight'] bet = state['bet'] # Based on how much the Better bets on -ve gradient prediction, # check how much the Better won (-ve if lost) win_amount = bet * neg_grad # Update better's wealth based on what he won / lost. wealth = wealth + win_amount # Better decides the bet fraction based on so-far observations # z, A variable notations from Algo 1 in paper) z = grad / (1-(bet_fraction*grad)) z_square_sum = z_square_sum + (z*z) A = 1 + z_square_sum bet_fraction = (bet_fraction - (2/(2 - math.log(3)))*(z / A)) bet_fraction.clamp_(-0.5, 0.5) # Better makes the bet according to decided betting fraction. bet = bet_fraction * wealth # Set parameter weights param.data = initial_weight + bet # save state back in memory state['bet_fraction'] = bet_fraction state['wealth'] = wealth state['z_square_sum'] = z_square_sum state['bet'] = bet return loss