Source code for storm_kit.mpc.control.control_utils

#
# MIT License
#
# Copyright (c) 2020-2021 NVIDIA CORPORATION.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.#
import math

import numpy as np
import torch
from torch.distributions.multivariate_normal import MultivariateNormal
import ghalton


[docs]def scale_ctrl(ctrl, action_lows, action_highs, squash_fn='clamp'): if len(ctrl.shape) == 1: ctrl = ctrl[np.newaxis, :, np.newaxis] act_half_range = (action_highs - action_lows) / 2.0 act_mid_range = (action_highs + action_lows) / 2.0 if squash_fn == 'clamp': # ctrl = torch.clamp(ctrl, action_lows[0], action_highs[0]) ctrl = torch.max(torch.min(ctrl, action_highs), action_lows) return ctrl elif squash_fn == 'clamp_rescale': ctrl = torch.clamp(ctrl, -1.0, 1.0) elif squash_fn == 'tanh': ctrl = torch.tanh(ctrl) elif squash_fn == 'identity': return ctrl return act_mid_range.unsqueeze(0) + ctrl * act_half_range.unsqueeze(0)
####################### ## STOMP Covariance ## #######################
[docs]def get_stomp_cov(horizon, d_action, tensor_args={'device':torch.device('cpu'),'dtype':torch.float32}, cov_mode='vel', RETURN_R=False): """ Computes the covariance matrix following STOMP motion planner Coefficients from here: https://en.wikipedia.org/wiki/Finite_difference_coefficient More info here: https://github.com/ros-industrial/stomp_ros/blob/7fe40fbe6ad446459d8d4889916c64e276dbf882/stomp_core/src/utils.cpp#L36 """ acc_fd_array = [0,-1 / 12, 4 / 3, -5 / 2, 4 / 3, -1 / 12, 0] #acc_fd_array = [1/90, -3/20, 3/2, -49/18, 3/2 , -3/20, 1/90 ] #jerk_fd_array = [0, 1 / 12.0, -17 / 12.0, 46 / 12.0, -46 / 12.0, 17 / 12.0, -1 / 12.0] jerk_fd_array = [1 / 8.0, -1, 13/8, 0 , -13/8, 1, -1/8] #snap_fd_array = [-1/6, 2.0, -13/2, 28/3, -13/2, 2, -1/6] snap_fd_array = [0, 1, -4, 6, -4, 1, 0] #vel_fd_array = [0, 1.0/12.0 , -2.0/3.0 , 0 , 2.0/3.0 , -1.0/12.0 , 0 ] vel_fd_array = [0, 0 , 1, -2 , 1,0, 0 ] fd_array = acc_fd_array A = torch.zeros((d_action * horizon, d_action * horizon), device=tensor_args['device'],dtype=torch.float64) if(cov_mode == 'vel'): for k in range(d_action): for i in range(0, horizon): for j in range(-3,4): #print(j) index = i + j if(index < 0): index = 0 continue if(index >= horizon): index = horizon - 1 continue A[k * horizon + i,k * horizon + index] = fd_array[j + 3] elif(cov_mode == 'acc'): for k in range(d_action): for i in range(0, horizon): for j in range(-3,4): #print(j) index = i + j if(index < 0): index = 0 continue if(index >= horizon): index = horizon - 1 continue if(index >= horizon/2): #print(k * horizon + index - horizon//2) A[k * horizon + i,k * horizon - index - horizon//2 -1] = fd_array[j + 3] #* float((horizon-index) / horizon) else: A[k * horizon + i,k * horizon + index] = fd_array[j + 3] #* float(index/horizon) #plt.imshow(A) #plt.show() R = torch.matmul(A.transpose(-2,-1), A) #print(R[:horizon, :horizon]) #plt.imshow(R) #plt.show() #print(R) #print(torch.det(R)) cov = torch.inverse(R) cov = cov / torch.max(torch.abs(cov)) #plt.imshow(cov) #plt.show() # also compute the cholesky decomposition: scale_tril = torch.zeros((d_action * horizon, d_action * horizon), **tensor_args) scale_tril = torch.cholesky(cov) ''' k = 0 act_cov_matrix = cov[k * horizon:k * horizon + horizon, k * horizon:k * horizon + horizon] print(act_cov_matrix.shape) print(torch.det(act_cov_matrix)) local_cholesky = matrix_cholesky(act_cov_matrix) for k in range(d_action): scale_tril[k * horizon:k * horizon + horizon,k * horizon:k * horizon + horizon] = local_cholesky ''' cov = cov.to(**tensor_args) scale_tril = scale_tril.to(**tensor_args) #* 0.1 scale_tril = scale_tril / torch.max(scale_tril) if(RETURN_R): return cov, scale_tril, R return cov, scale_tril
####################### ## Gaussian Sampling ## #######################
[docs]def generate_noise(cov, shape, base_seed, filter_coeffs=None, device=torch.device('cpu')): """ Generate correlated Gaussian samples using autoregressive process """ torch.manual_seed(base_seed) beta_0, beta_1, beta_2 = filter_coeffs N = cov.shape[0] m = MultivariateNormal(loc=torch.zeros(N).to(device), covariance_matrix=cov) eps = m.sample(sample_shape=shape) # eps = np.random.multivariate_normal(mean=np.zeros((N,)), cov = cov, size=shape) if filter_coeffs is not None: for i in range(2, eps.shape[1]): eps[:,i,:] = beta_0*eps[:,i,:] + beta_1*eps[:,i-1,:] + beta_2*eps[:,i-2,:] return eps
[docs]def generate_noise_np(cov, shape, base_seed, filter_coeffs=None): """ Generate correlated noisy samples using autoregressive process """ np.random.seed(base_seed) beta_0, beta_1, beta_2 = filter_coeffs N = cov.shape[0] eps = np.random.multivariate_normal(mean=np.zeros((N,)), cov = cov, size=shape) if filter_coeffs is not None: for i in range(2, eps.shape[1]): eps[:,i,:] = beta_0*eps[:,i,:] + beta_1*eps[:,i-1,:] + beta_2*eps[:,i-2,:] return eps
########################### ## Quasi-Random Sampling ## ###########################
[docs]def generate_prime_numbers(num): def is_prime(n): for j in range(2, ((n //2) + 1),1): if n % j == 0: return False return True primes = [0] * num #torch.zeros(num, device=device) primes[0] = 2 curr_num = 1 for i in range(1, num): while True: curr_num += 2 if is_prime(curr_num): primes[i] = curr_num break return primes
[docs]def generate_van_der_corput_sample(idx, base): f, r = 1.0, 0 while idx > 0: f /= base*1.0 r += f * (idx % base) idx = idx // base return r
[docs]def generate_van_der_corput_samples_batch(idx_batch, base): inp_device = idx_batch.device batch_size = idx_batch.shape[0] f = 1.0 #torch.ones(batch_size, device=inp_device) r = torch.zeros(batch_size, device=inp_device) while torch.any(idx_batch > 0): f /= base*1.0 r += f * (idx_batch % base) #* (idx_batch > 0) idx_batch = idx_batch // base return r
# def generate_van_der_corput_samples_batch_2(idx_batch, bases): # inp_device = idx_batch.device # batch_size = idx_batch.shape[0] # f = torch.ones(batch_size, device=inp_device) # r = torch.zeros(batch_size, device=inp_device) # while torch.any(idx_batch > 0): # f /= bases*1.0 # r += f * (idx_batch % base) #* (idx_batch > 0) # idx_batch = idx_batch // base # return r
[docs]def generate_halton_samples(num_samples, ndims, bases=None, use_ghalton=True, seed_val=123, device=torch.device('cpu'), float_dtype=torch.float64): if not use_ghalton: samples = torch.zeros(num_samples, ndims, device=device, dtype=float_dtype) if not bases: bases = generate_prime_numbers(ndims) idx_batch = torch.arange(1,num_samples+1, device=device) for dim in range(ndims): samples[:, dim] = generate_van_der_corput_samples_batch(idx_batch, bases[dim]) else: if ndims <= 100: perms = ghalton.EA_PERMS[:ndims] sequencer = ghalton.GeneralizedHalton(perms) else: sequencer = ghalton.GeneralizedHalton(ndims, seed_val) samples = torch.tensor(sequencer.get(num_samples), device=device, dtype=float_dtype) return samples
[docs]def generate_gaussian_halton_samples(num_samples, ndims, bases=None, use_ghalton=True, seed_val=123, device=torch.device('cpu'), float_dtype=torch.float64): uniform_halton_samples = generate_halton_samples(num_samples, ndims, bases, use_ghalton, seed_val, device, float_dtype) gaussian_halton_samples = torch.sqrt(torch.tensor([2.0],device=device,dtype=float_dtype)) * torch.erfinv(2 * uniform_halton_samples - 1) return gaussian_halton_samples
[docs]def generate_gaussian_sobol_samples(num_samples, ndims, seed_val, device=torch.device('cpu'), float_dtype=torch.float64): soboleng = torch.quasirandom.SobolEngine(dimension=ndims, scramble=True, seed=seed_val) uniform_sobol_samples = soboleng.draw(num_samples).to(device) gaussian_sobol_samples = torch.sqrt(torch.tensor([2.0],device=device,dtype=float_dtype)) * torch.erfinv(2 * uniform_sobol_samples - 1) return gaussian_sobol_samples
######################## ## Gaussian Utilities ## ########################
[docs]def gaussian_logprob(mean, cov, x, cov_type="full"): """ Calculate gaussian log prob for given input batch x Parameters ---------- mean (np.ndarray): [N x num_samples] batch of means cov (np.ndarray): [N x N] covariance matrix x (np.ndarray): [N x num_samples] batch of sample values Returns -------- log_prob (np.ndarray): [num_sampls] log probability of each sample """ N = cov.shape[0] if cov_type == "diagonal": cov_diag = cov.diagonal() cov_inv = np.diag(1.0 / cov_diag) cov_logdet = np.sum(np.log(cov_diag)) else: cov_logdet = np.log(np.linalg.det(cov)) cov_inv = np.linalg.inv(cov) diff = (x - mean).T mahalanobis_dist = -0.5 * np.sum((diff @ cov_inv) * diff, axis=1) const1 = -0.5 * N * np.log(2.0 * np.pi) const2 = -0.5*cov_logdet log_prob = mahalanobis_dist + const1 + const2 return log_prob
[docs]def gaussian_logprobgrad(mean, cov, x, cov_type="full"): if cov_type == "diagonal": cov_inv = np.diag(1.0/cov.diagonal()) else: cov_inv = np.linalg.inv(cov) diff = (x - mean).T grad = diff @ cov_inv return grad
[docs]def gaussian_entropy(cov=None, L=None): #, cov_type="full"): """ Entropy of multivariate gaussian given either covariance or cholesky decomposition of covariance """ if cov is not None: inp_device = cov.device cov_logdet = torch.log(torch.det(cov)) # print(np.linalg.det(cov.cpu().numpy())) # print(torch.det(cov)) N = cov.shape[0] else: inp_device = L.device cov_logdet = 2.0 * torch.sum(torch.log(torch.diagonal(L))) N = L.shape[0] # if cov_type == "diagonal": # cov_logdet = np.sum(np.log(cov.diagonal())) # else: # cov_logdet = np.log(np.linalg.det(cov)) term1 = 0.5 * cov_logdet # pi = torch.tensor([math.pi], device=inp_device) # pre-calculate 1.0 + torch.log(2.0*pi) = 2.837877066 term2 = 0.5 * N * 2.837877066 ent = term1 + term2 return ent.to(inp_device)
[docs]def gaussian_kl(mean0, cov0, mean1, cov1, cov_type="full"): """ KL-divergence between Gaussians given mean and covariance KL(p||q) = E_{p}[log(p) - log(q)] """ N = cov0.shape[0] if cov_type == "diagonal": cov1_diag = cov1.diagonal() cov1_inv = np.diag(1.0 / cov1_diag) cov0_logdet = np.sum(np.log(cov0.diagonal())) cov1_logdet = np.sum(np.log(cov1_diag)) else: cov1_inv = np.linalg.inv(cov1) cov0_logdet = np.log(np.linalg.det(cov0)) cov1_logdet = np.log(np.linalg.det(cov1)) term1 = 0.5 * np.trace(cov1_inv @ cov0) diff = (mean1 - mean0).T mahalanobis_dist = 0.5 * np.sum((diff @ cov1_inv) * diff, axis=1) term3 = 0.5 * (-1.0*N + cov1_logdet - cov0_logdet) return term1 + mahalanobis_dist + term3
[docs]def cost_to_go(cost_seq, gamma_seq): """ Calculate (discounted) cost to go for given cost sequence """ # if torch.any(gamma_seq == 0): # return cost_seq cost_seq = gamma_seq * cost_seq # discounted cost sequence # cost_seq = torch.cumsum(cost_seq[:, ::-1], axis=-1)[:, ::-1] # cost to go (but scaled by [1 , gamma, gamma*2 and so on]) cost_seq = torch.fliplr(torch.cumsum(torch.fliplr(cost_seq), axis=-1)) # cost to go (but scaled by [1 , gamma, gamma*2 and so on]) cost_seq /= gamma_seq # un-scale it to get true discounted cost to go return cost_seq
[docs]def cost_to_go_np(cost_seq, gamma_seq): """ Calculate (discounted) cost to go for given cost sequence """ # if np.any(gamma_seq == 0): # return cost_seq cost_seq = gamma_seq * cost_seq # discounted reward sequence cost_seq = np.cumsum(cost_seq[:, ::-1], axis=-1)[:, ::-1] # cost to go (but scaled by [1 , gamma, gamma*2 and so on]) cost_seq /= gamma_seq # un-scale it to get true discounted cost to go return cost_seq
############ ##Cholesky## ############
[docs]def matrix_cholesky(A): L = torch.zeros_like(A) for i in range(A.shape[-1]): for j in range(i+1): s = 0.0 for k in range(j): s = s + L[i,k] * L[j,k] L[i,j] = torch.sqrt(A[i,i] - s) if (i == j) else \ (1.0 / L[j,j] * (A[i,j] - s)) return L
# Batched Cholesky decomp
[docs]def batch_cholesky(A): L = torch.zeros_like(A) for i in range(A.shape[-1]): for j in range(i+1): s = 0.0 for k in range(j): s = s + L[...,i,k] * L[...,j,k] L[...,i,j] = torch.sqrt(A[...,i,i] - s) if (i == j) else \ (1.0 / L[...,j,j] * (A[...,i,j] - s)) return L