Source code for storm_kit.mpc.control.mppi

#!/usr/bin/env python
#
# MIT License
#
# Copyright (c) 2020-2021 NVIDIA CORPORATION.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.#

import copy

import numpy as np
import scipy.special
import torch
from torch.distributions.multivariate_normal import MultivariateNormal
from torch.nn.functional import normalize as f_norm

from .control_utils import cost_to_go, matrix_cholesky, batch_cholesky
from .olgaussian_mpc import OLGaussianMPC

[docs]class MPPI(OLGaussianMPC): """ .. inheritance-diagram:: MPPI :parts: 1 Class that implements Model Predictive Path Integral Controller Implementation is based on Williams et. al, Information Theoretic MPC for Model-Based Reinforcement Learning with additional functions for updating the covariance matrix and calculating the soft-value function. """ def __init__(self, d_action, horizon, init_cov, init_mean, base_action, beta, num_particles, step_size_mean, step_size_cov, alpha, gamma, kappa, n_iters, action_lows, action_highs, null_act_frac=0., rollout_fn=None, sample_mode='mean', hotstart=True, squash_fn='clamp', update_cov=False, cov_type='sigma_I', seed=0, sample_params={'type': 'halton', 'fixed_samples': True, 'seed':0, 'filter_coeffs':None}, tensor_args={'device':torch.device('cpu'), 'dtype':torch.float32}, visual_traj='state_seq'): super(MPPI, self).__init__(d_action, action_lows, action_highs, horizon, init_cov, init_mean, base_action, num_particles, gamma, n_iters, step_size_mean, step_size_cov, null_act_frac, rollout_fn, sample_mode, hotstart, squash_fn, cov_type, seed, sample_params=sample_params, tensor_args=tensor_args) self.beta = beta self.alpha = alpha # 0 means control cost is on, 1 means off self.update_cov = update_cov self.kappa = kappa self.visual_traj = visual_traj
[docs] def _update_distribution(self, trajectories): """ Update moments in the direction using sampled trajectories """ costs = trajectories["costs"].to(**self.tensor_args) vis_seq = trajectories[self.visual_traj].to(**self.tensor_args) actions = trajectories["actions"].to(**self.tensor_args) w = self._exp_util(costs, actions) #Update best action best_idx = torch.argmax(w) self.best_idx = best_idx self.best_traj = torch.index_select(actions, 0, best_idx).squeeze(0) top_values, top_idx = torch.topk(self.total_costs, 10) #print(ee_pos_seq.shape, top_idx) self.top_values = top_values self.top_idx = top_idx self.top_trajs = torch.index_select(vis_seq, 0, top_idx).squeeze(0) #print(self.top_traj.shape) #print(self.best_traj.shape, best_idx, w.shape) #self.best_trajs = torch.index_select( weighted_seq = w.T * actions.T sum_seq = torch.sum(weighted_seq.T, dim=0) new_mean = sum_seq #print(self.stomp_matrix.shape, self.full_scale_tril.shape) #cov = self.stomp_matrix #torch.matmul(self.full_scale_tril, self.stomp_matrix) #m_matrix = (1.0 / self.horizon) * cov # f_norm(cov,dim=0) #sum_seq = sum_seq.transpose(0,1) #new_mean = torch.matmul(m_matrix,sum_seq.reshape(self.horizon * self.d_action,1)).view(self.d_action, self.horizon).transpose(0,1) # plot mean: # = new_mean.cpu().numpy() #b = sum_seq.cpu().numpy()#.T #print(w, top_idx) #new_mean = sum_seq.T #matplotlib.use('tkagg') self.mean_action = (1.0 - self.step_size_mean) * self.mean_action +\ self.step_size_mean * new_mean #c = self.mean_action.cpu().numpy() #plt.plot(a[:,0]) #plt.plot(b[:,0]) #plt.plot(actions[top_idx[0],:,0].cpu().numpy()) #plt.show() delta = actions - self.mean_action.unsqueeze(0) #Update Covariance if self.update_cov: if self.cov_type == 'sigma_I': #weighted_delta = w * (delta ** 2).T #cov_update = torch.mean(torch.sum(weighted_delta.T, dim=0)) #print(cov_update.shape, self.cov_action) raise NotImplementedError('Need to implement covariance update of form sigma*I') elif self.cov_type == 'diag_AxA': #Diagonal covariance of size AxA weighted_delta = w * (delta ** 2).T # cov_update = torch.diag(torch.mean(torch.sum(weighted_delta.T, dim=0), dim=0)) cov_update = torch.mean(torch.sum(weighted_delta.T, dim=0), dim=0) elif self.cov_type == 'diag_HxH': raise NotImplementedError elif self.cov_type == 'full_AxA': #Full Covariance of size AxA weighted_delta = torch.sqrt(w) * (delta).T weighted_delta = weighted_delta.T.reshape((self.horizon * self.num_particles, self.d_action)) cov_update = torch.matmul(weighted_delta.T, weighted_delta) / self.horizon elif self.cov_type == 'full_HAxHA':# and self.sample_type != 'stomp': weighted_delta = torch.sqrt(w) * delta.view(delta.shape[0], delta.shape[1] * delta.shape[2]).T #.unsqueeze(-1) cov_update = torch.matmul(weighted_delta, weighted_delta.T) # weighted_cov = w * (torch.matmul(delta_new, delta_new.transpose(-2,-1))).T # weighted_cov = w * cov.T # cov_update = torch.sum(weighted_cov.T,dim=0) # #elif self.sample_type == 'stomp': # weighted_delta = w * (delta ** 2).T # cov_update = torch.mean(torch.sum(weighted_delta.T, dim=0), dim=0) # self.cov_action = (1.0 - self.step_size_cov) * self.cov_action +\ # self.step_size_cov * cov_update # #self.scale_tril = torch.sqrt(self.cov_action) # return else: raise ValueError('Unidentified covariance type in update_distribution') self.cov_action = (1.0 - self.step_size_cov) * self.cov_action +\ self.step_size_cov * cov_update
#if(cov_update == 'diag_AxA'): # self.scale_tril = torch.sqrt(self.cov_action) # self.scale_tril = torch.cholesky(self.cov_action)
[docs] def _shift(self, shift_steps): """ Predict good parameters for the next time step by shifting the mean forward one step and growing the covariance """ if(shift_steps == 0): return super()._shift(shift_steps) if self.update_cov: if self.cov_type == 'sigma_I': self.cov_action += self.kappa #* self.init_cov_action self.scale_tril = torch.sqrt(self.cov_action) # self.inv_cov_action = 1.0 / self.cov_action elif self.cov_type == 'diag_AxA': self.cov_action += self.kappa #* self.init_cov_action #self.cov_action[self.cov_action < 0.0005] = 0.0005 self.scale_tril = torch.sqrt(self.cov_action) # self.inv_cov_action = 1.0 / self.cov_action elif self.cov_type == 'full_AxA': self.cov_action += self.kappa*self.I self.scale_tril = matrix_cholesky(self.cov_action) # torch.cholesky(self.cov_action) # # self.scale_tril = torch.cholesky(self.cov_action) # self.inv_cov_action = torch.cholesky_inverse(self.scale_tril) elif self.cov_type == 'full_HAxHA': self.cov_action += self.kappa * self.I #shift covariance up and to the left # self.cov_action = torch.roll(self.cov_action, shifts=(-self.d_action, -self.d_action), dims=(0,1)) # self.cov_action = torch.roll(self.cov_action, shifts=(-self.d_action, -self.d_action), dims=(0,1)) # #set bottom A rows and right A columns to zeros # self.cov_action[-self.d_action:,:].zero_() # self.cov_action[:,-self.d_action:].zero_() # #set bottom right AxA block to init_cov value # self.cov_action[-self.d_action:, -self.d_action:] = self.init_cov*self.I2 shift_dim = shift_steps * self.d_action I2 = torch.eye(shift_dim, **self.tensor_args) self.cov_action = torch.roll(self.cov_action, shifts=(-shift_dim, -shift_dim), dims=(0,1)) #set bottom A rows and right A columns to zeros self.cov_action[-shift_dim:,:].zero_() self.cov_action[:,-shift_dim:].zero_() #set bottom right AxA block to init_cov value self.cov_action[-shift_dim:, -shift_dim:] = self.init_cov*I2 #update cholesky decomp self.scale_tril = torch.cholesky(self.cov_action)
# self.inv_cov_action = torch.cholesky_inverse(self.scale_tril)
[docs] def _exp_util(self, costs, actions): """ Calculate weights using exponential utility """ traj_costs = cost_to_go(costs, self.gamma_seq) # if not self.time_based_weights: traj_costs = traj_costs[:,0] traj_costs = traj_costs[:,0] #control_costs = self._control_costs(actions) total_costs = traj_costs #+ self.beta * control_costs # #calculate soft-max w = torch.softmax((-1.0/self.beta) * total_costs, dim=0) self.total_costs = total_costs return w
[docs] def _control_costs(self, actions): if self.alpha == 1: # if not self.time_based_weights: return torch.zeros(actions.shape[0], **self.tensor_args) else: # u_normalized = self.mean_action.dot(np.linalg.inv(self.cov_action))[np.newaxis,:,:] # control_costs = 0.5 * u_normalized * (self.mean_action[np.newaxis,:,:] + 2.0 * delta) # control_costs = np.sum(control_costs, axis=-1) # control_costs = cost_to_go(control_costs, self.gamma_seq) # # if not self.time_based_weights: control_costs = control_costs[:,0] # control_costs = control_costs[:,0] delta = actions - self.mean_action.unsqueeze(0) u_normalized = self.mean_action.matmul(self.full_inv_cov).unsqueeze(0) control_costs = 0.5 * u_normalized * (self.mean_action.unsqueeze(0) + 2.0 * delta) control_costs = torch.sum(control_costs, dim=-1) control_costs = cost_to_go(control_costs, self.gamma_seq) control_costs = control_costs[:,0] return control_costs
[docs] def _calc_val(self, trajectories): costs = trajectories["costs"].to(**self.tensor_args) actions = trajectories["actions"].to(**self.tensor_args) delta = actions - self.mean_action.unsqueeze(0) traj_costs = cost_to_go(costs, self.gamma_seq)[:,0] control_costs = self._control_costs(delta) total_costs = traj_costs + self.beta * control_costs # calculate log-sum-exp # c = (-1.0/self.beta) * total_costs.copy() # cmax = np.max(c) # c -= cmax # c = np.exp(c) # val1 = cmax + np.log(np.sum(c)) - np.log(c.shape[0]) # val1 = -self.beta * val1 # val = -self.beta * scipy.special.logsumexp((-1.0/self.beta) * total_costs, b=(1.0/total_costs.shape[0])) val = -self.beta * torch.logsumexp((-1.0/self.beta) * total_costs) return val