Source code for storm_kit.mpc.control.olgaussian_mpc

#
# MIT License
#
# Copyright (c) 2020-2021 NVIDIA CORPORATION.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.#
"""
MPC with open-loop Gaussian policies
"""
import copy

import numpy as np
import torch
from torch.distributions.multivariate_normal import MultivariateNormal

from .control_base import Controller
from .control_utils import generate_noise, scale_ctrl, generate_gaussian_halton_samples, generate_gaussian_sobol_samples, gaussian_entropy, matrix_cholesky, batch_cholesky, get_stomp_cov
from .sample_libs import StompSampleLib, HaltonSampleLib, RandomSampleLib, HaltonStompSampleLib, MultipleSampleLib

[docs]class OLGaussianMPC(Controller): """ .. inheritance-diagram:: OLGaussianMPC :parts: 1 """ def __init__(self, d_action, action_lows, action_highs, horizon, init_cov, init_mean, base_action, num_particles, gamma, n_iters, step_size_mean, step_size_cov, null_act_frac=0., rollout_fn=None, sample_mode='mean', hotstart=True, squash_fn='clamp', cov_type='sigma_I', seed=0, sample_params={'type': 'halton', 'fixed_samples': True, 'seed':0, 'filter_coeffs':None}, tensor_args={'device':torch.device('cpu'), 'dtype':torch.float32}, fixed_actions=False): """ Parameters __________ base_action : str Action to append at the end when shifting solution to next timestep 'random' : appends random action 'null' : appends zero action 'repeat' : repeats second to last action num_particles : int Number of action sequences sampled at every iteration """ super(OLGaussianMPC, self).__init__(d_action, action_lows, action_highs, horizon, gamma, n_iters, rollout_fn, sample_mode, hotstart, seed, tensor_args) self.init_cov = init_cov self.init_mean = init_mean.clone().to(**self.tensor_args) self.cov_type = cov_type self.base_action = base_action self.num_particles = num_particles self.step_size_mean = step_size_mean self.step_size_cov = step_size_cov self.squash_fn = squash_fn self.null_act_frac = null_act_frac self.num_null_particles = round(int(null_act_frac * self.num_particles * 1.0)) self.num_neg_particles = round(int(null_act_frac * self.num_particles)) - self.num_null_particles self.num_nonzero_particles = self.num_particles - self.num_null_particles - self.num_neg_particles #print(self.num_null_particles, self.num_neg_particles) self.sample_params = sample_params self.sample_type = sample_params['type'] # initialize sampling library: if sample_params['type'] == 'stomp': self.sample_lib = StompSampleLib(self.horizon, self.d_action, tensor_args=self.tensor_args) self.sample_shape = torch.Size([self.num_nonzero_particles - 2], device=self.tensor_args['device']) self.i_ha = torch.eye(self.d_action, **self.tensor_args).repeat(1, self.horizon) elif sample_params['type'] == 'halton': self.sample_lib = HaltonSampleLib(self.horizon, self.d_action, tensor_args=self.tensor_args, **self.sample_params) self.sample_shape = torch.Size([self.num_nonzero_particles - 2], device=self.tensor_args['device']) elif sample_params['type'] == 'random': self.sample_lib = RandomSampleLib(self.horizon, self.d_action, tensor_args=self.tensor_args, **self.sample_params) self.sample_shape = torch.Size([self.num_nonzero_particles - 2], device=self.tensor_args['device']) elif sample_params['type'] == 'multiple': self.sample_lib = MultipleSampleLib(self.horizon, self.d_action, tensor_args=self.tensor_args, **self.sample_params) self.sample_shape = torch.Size([self.num_nonzero_particles - 2], device=self.tensor_args['device']) self.stomp_matrix = None #self.sample_lib.stomp_cov_matrix # initialize covariance types: if self.cov_type == 'full_HAxHA': self.I = torch.eye(self.horizon * self.d_action, **self.tensor_args) else: # AxA self.I = torch.eye(self.d_action, **self.tensor_args) self.Z_seq = torch.zeros(1, self.horizon, self.d_action, **self.tensor_args) self.reset_distribution() if self.num_null_particles > 0: self.null_act_seqs = torch.zeros(self.num_null_particles, self.horizon, self.d_action, **self.tensor_args) self.delta = None
[docs] def _get_action_seq(self, mode='mean'): if mode == 'mean': act_seq = self.mean_action.clone() elif mode == 'sample': delta = self.generate_noise(shape=torch.Size((1, self.horizon)), base_seed=self.seed_val + 123 * self.num_steps) act_seq = self.mean_action + torch.matmul(delta, self.full_scale_tril) else: raise ValueError('Unidentified sampling mode in get_next_action') act_seq = scale_ctrl(act_seq, self.action_lows, self.action_highs, squash_fn=self.squash_fn) return act_seq
[docs] def generate_noise(self, shape, base_seed=None): """ Generate correlated noisy samples using autoregressive process """ delta = self.sample_lib.get_samples(sample_shape=shape, seed=base_seed) return delta
[docs] def sample_actions(self, state=None): delta = self.sample_lib.get_samples(sample_shape=self.sample_shape, base_seed=self.seed_val + self.num_steps) #add zero-noise seq so mean is always a part of samples delta = torch.cat((delta, self.Z_seq), dim=0) # samples could be from HAxHA or AxA: # We reshape them based on covariance type: # if cov is AxA, then we don't reshape samples as samples are: N x H x A # if cov is HAxHA, then we reshape if self.cov_type == 'full_HAxHA': # delta: N * H * A -> N * HA delta = delta.view(delta.shape[0], self.horizon * self.d_action) scaled_delta = torch.matmul(delta, self.full_scale_tril).view(delta.shape[0], self.horizon, self.d_action) # debug_act = delta[:,:,0].cpu().numpy() act_seq = self.mean_action.unsqueeze(0) + scaled_delta act_seq = scale_ctrl(act_seq, self.action_lows, self.action_highs, squash_fn=self.squash_fn) append_acts = self.best_traj.unsqueeze(0) #append zero actions (for stopping) if self.num_null_particles > 0: # zero particles: # negative action particles: neg_action = -1.0 * self.mean_action.unsqueeze(0) neg_act_seqs = neg_action.expand(self.num_neg_particles,-1,-1) append_acts = torch.cat((append_acts, self.null_act_seqs, neg_act_seqs),dim=0) act_seq = torch.cat((act_seq, append_acts), dim=0) return act_seq
[docs] def generate_rollouts(self, state): """ Samples a batch of actions, rolls out trajectories for each particle and returns the resulting observations, costs, actions Parameters ---------- state : dict or np.ndarray Initial state to set the simulation env to """ act_seq = self.sample_actions(state=state) # sample noise from covariance of current control distribution trajectories = self._rollout_fn(state, act_seq) return trajectories
[docs] def _shift(self, shift_steps=1): """ Predict mean for the next time step by shifting the current mean forward by one step """ if(shift_steps == 0): return # self.new_mean_action = self.mean_action.clone() # self.new_mean_action[:-1] = #self.mean_action[1:] self.mean_action = self.mean_action.roll(-shift_steps,0) self.best_traj = self.best_traj.roll(-shift_steps,0) if self.base_action == 'random': self.mean_action[-1] = self.generate_noise(shape=torch.Size((1, 1)), base_seed=self.seed_val + 123*self.num_steps) self.best_traj[-1] = self.generate_noise(shape=torch.Size((1, 1)), base_seed=self.seed_val + 123*self.num_steps) elif self.base_action == 'null': self.mean_action[-shift_steps:].zero_() self.best_traj[-shift_steps:].zero_() elif self.base_action == 'repeat': self.mean_action[-shift_steps:] = self.mean_action[-shift_steps -1].clone() self.best_traj[-shift_steps:] = self.best_traj[-shift_steps -1 ].clone() #self.mean_action[-1] = self.mean_action[-2].clone() #self.best_traj[-1] = self.best_traj[-2].clone() else: raise NotImplementedError("invalid option for base action during shift")
# self.mean_action = self.new_mean_action
[docs] def reset_mean(self): self.mean_action = self.init_mean.clone() self.best_traj = self.mean_action.clone()
[docs] def reset_covariance(self): if self.cov_type == 'sigma_I': self.cov_action = torch.tensor(self.init_cov, **self.tensor_args) self.init_cov_action = self.init_cov self.inv_cov_action = 1.0 / self.init_cov self.scale_tril = torch.sqrt(self.cov_action) elif self.cov_type == 'diag_AxA': self.init_cov_action = torch.tensor([self.init_cov]*self.d_action, **self.tensor_args) self.cov_action = self.init_cov_action self.inv_cov_action = 1.0 / self.cov_action self.scale_tril = torch.sqrt(self.cov_action) elif self.cov_type == 'full_AxA': self.init_cov_action = torch.diag(torch.tensor([self.init_cov]*self.d_action, **self.tensor_args)) self.cov_action = self.init_cov_action self.scale_tril = matrix_cholesky(self.cov_action) #torch.cholesky(self.cov_action) self.inv_cov_action = torch.cholesky_inverse(self.scale_tril) elif self.cov_type == 'full_HAxHA': self.init_cov_action = torch.diag(torch.tensor([self.init_cov] * (self.horizon * self.d_action), **self.tensor_args)) self.cov_action = self.init_cov_action self.scale_tril = torch.cholesky(self.cov_action) self.inv_cov_action = torch.cholesky_inverse(self.scale_tril) else: raise ValueError('Unidentified covariance type in update_distribution')
[docs] def reset_distribution(self): """ Reset control distribution """ self.reset_mean() self.reset_covariance()
[docs] def _calc_val(self, cost_seq, act_seq): raise NotImplementedError("_calc_val not implemented")
@property def squashed_mean(self): return scale_ctrl(self.mean_action, self.action_lows, self.action_highs, squash_fn=self.squash_fn) @property def full_cov(self): if self.cov_type == 'sigma_I': return self.cov_action * self.I elif self.cov_type == 'diag_AxA': return torch.diag(self.cov_action) elif self.cov_type == 'full_AxA': return self.cov_action elif self.cov_type == 'full_HAxHA': return self.cov_action @property def full_inv_cov(self): if self.cov_type == 'sigma_I': return self.inv_cov_action * self.I elif self.cov_type == 'diag_AxA': return torch.diag(self.inv_cov_action) elif self.cov_type == 'full_AxA': return self.inv_cov_action elif self.cov_type == 'full_HAxHA': return self.inv_cov_action @property def full_scale_tril(self): if self.cov_type == 'sigma_I': return self.scale_tril * self.I elif self.cov_type == 'diag_AxA': return torch.diag(self.scale_tril) elif self.cov_type == 'full_AxA': return self.scale_tril elif self.cov_type == 'full_HAxHA': return self.scale_tril @property def entropy(self): # ent_cov = gaussian_entropy(cov=self.full_cov) ent_L = gaussian_entropy(L=self.full_scale_tril) return ent_L