#PACKAGES -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

using Optim, Plots, DelimitedFiles, LinearAlgebra, Random, StatsBase, FiniteDifferences, LaTeXStrings , EasyFit, Printf, FFTW, Pkg, Noise, Clustering, Dierckx, BSplineKit, MultivariateStats, Flux, Combinatorics, Bigsimr, DataFrames, JLD

#FUNCTIONS -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

#functions for Todorov's and numerical GD optimization, with useful functions to get model predictions.
cd(@__DIR__)#to go back to the directory of the script
include("../../MyFunctions/functions_sensory_motor_task.jl")

#Functions for full GD algorithms (1D and multiple dimensions)
cd(@__DIR__)#to go back to the directory of the script
include("../../MyFunctions/functions_full_GD_algorithms.jl")

#TASK PARAMETERS, we follow (Todorov, 2005) -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
dimension_of_state = 4 #We chose a system with a 4D state vector, instead of the 5D system of Todorov: see comments about that in the document "functions_Reaching_task_application".
dimension_of_control = 1 #1D control.
observation_dimension = 1 #We chose a 1D sensory feedback for simplicity and to give major emphasis to the visual stimuli (for which the multiplicative and internal noise 
#description could be more significant: eccentricity for multiplicative noise and memory leak for what concerns the internal noise - considering a case in which we need to
#remember the position of the target, like in the navigation task).

Δt = 0.010 #seconds
N_steps = 100 #we use the same time steps as the estimation task of (Todorov, 2005)
m = 1 #Kg, mass of the hand, modelled as a point mass
target_position_mean = 0.15 #meters; mean of the gaussian distribution from which the target position is extracted. We use here the params used in the estimation task of 
#(Todorov, 2005), so that the target is in the periphery and multiplicative sensory noise plays a role. Note that three more means are used: 0.05m and 0.25m.
target_position_std = 0.00 #Note that optimal controls and estimators could depende on the target position: consider putting zero variance of that to simplify the problem at
#the moment. In (Todorov, 2005) they use (0.05m)^2
tau_1 = 0.04 #seconds; tau_1 and tau_2 are the time constants of the second oreder low pass filter to map the control signal into the force acting on the hand
tau_2 = 0.04 #seconds
r = 0.00001 #as in the reaching atsk of (Todorov, 2005)
w_v = 0.2 #w_v and #w_f are computed by considering Todorov's choices in (Todorov, 2005) and rescaling them to our case in which target_displacement is 0.15 and N_steps = 100 
#(we thus just divided by 2 the values used in the paper)
w_f = 0.01 

#NOISE terms 
σ_ξ = 0.00 #we only use control dependent noise acting on the dynamics to mimic the reaching task of (Todorov, 2005).  
σ_ω_add_sensory_noise = 0.00 #we only use sensory dependent noise acting on the dynamics to mimic the estimation task of (Todorov, 2005).  
σ_ρ_mult_sensory_noise = 0.5 #as in the estimation task of (Todorov, 2005).
σ_ϵ_mult_control_noise = 0.5 #as in the reaching task of (Todorov, 2005).

σ_η_internal_noise = 0.0 #meters, as in the estimation task of (Todorov, 2005).
#we also consider here possible internal fluctuations on the other components of the state estimate 
σ_η_internal_noise_velocity = 0.0
σ_η_internal_noise_force = 0.0
σ_η_internal_noise_filtered_control = 0.0
#NOTE THAT IN THE PREVIOUS SIMULATIONS WE HAVE ALWAYS USED σ_η_internal_noise_velocity = σ_η_internal_noise_force = σ_η_internal_noise_filtered_control = 0.1, CHANGING ONLY 
#σ_η_internal_noise_position. ONLY FOR THE SIMULATION WITHOUT INTERNAL NOISE WE SET ALL THE MENTIONED VARIABLES AT ZERO.

#Initial conditions (mean state and state estimate, and their covariances) -- Note that initial state and state estimate are considered to be uncorrelated at t=1 (initial time step)

#Mean initial state and state estimates -- Note that we assume <x_1> = <z_1>

#note that x_state_0 is the initial mean state of the vector
x_state_mean_0 = zeros(dimension_of_state) #meters. 
x_state_mean_0[1] = target_position_mean 
x_state_mean_0[2] = 0
x_state_mean_0[3] = 0
x_state_mean_0[4] = 0

x_state_estimate_mean_0 = zeros(dimension_of_state)
x_state_estimate_mean_0[:] .= x_state_mean_0[:]

#State covariance 
target_position_variance = target_position_std^2
velocity_initial_variance = 0
force_initial_variance = 0
filtered_control_initial_variance = 0

Σ_1_x = Diagonal(ones(dimension_of_state)) #intial covariance of the state

Σ_1_x[1,1] = target_position_variance
Σ_1_x[2,2] = velocity_initial_variance
Σ_1_x[3,3] = force_initial_variance
Σ_1_x[4,4] = filtered_control_initial_variance

#State estimate covariance 
target_position_estimate_initial_variance = 0
velocity_estimate_initial_variance = 0
force_estimate_initial_variance = 0
filtered_control_estimate_initial_variance = 0

Σ_1_z = Diagonal(ones(dimension_of_state)) #intial covariance of the state estimate 

Σ_1_z[1,1] = target_position_estimate_initial_variance
Σ_1_z[2,2] = velocity_estimate_initial_variance
Σ_1_z[3,3] = force_estimate_initial_variance
Σ_1_z[4,4] = filtered_control_estimate_initial_variance

#Initial condition of raw moments.
#S_0 is a 4x4 block matrix containing the inital condition for the propagation of the raw moments (mxm matrices, where m is the dimension of the state), and it's defined this way: 
#S_0 = [<x_1*transpose(x_1)>, <x_1*transpose(z_1)>; <z_1*transpose(x_1)>, <z_1*transpose(z_1)>], where x is the state and z the state estimate. 
#This variable is used as an argument for "raw_moments_propagation_for_full_GD", used for the control (and estimation) optimization of the full GD.

S_0 = [zeros(dimension_of_state, dimension_of_state) for _ in 1:2, _ in 1:2]

S_0[1,1] .= Σ_1_x .+ x_state_mean_0*transpose(x_state_mean_0)
S_0[1,2] .= x_state_estimate_mean_0*transpose(x_state_mean_0)
S_0[2,1] .= x_state_mean_0*transpose(x_state_estimate_mean_0)
S_0[2,2] .= Σ_1_z .+ x_state_estimate_mean_0*transpose(x_state_estimate_mean_0)

#Matrices of the problem 
#Define the dynamical system
A = zeros(dimension_of_state, dimension_of_state)
B = zeros(dimension_of_state, dimension_of_control)
H = zeros(observation_dimension, dimension_of_state)
D = zeros(observation_dimension, dimension_of_state)

Ω_ξ = Diagonal([0, σ_ξ^2, 0, 0]) #we consider the sqrt of the covariance matrix, so we don't need to call the function sqrt at each time step iteration
Ω_ω = σ_ω_add_sensory_noise^2 #Additive sensory noise (1D sensory feedback)
Ω_η = Diagonal([σ_η_internal_noise, σ_η_internal_noise_velocity^2, σ_η_internal_noise_force^2, σ_η_internal_noise_filtered_control^2]) #Internal noise is acting only on the osberved variables (position, velocity: visual feedbacks; force: proprioception)

A[1,:] = [1, Δt, 0, 0]
A[2,:] = [0, 1, Δt/m, 0]
A[3,:] = [0, 0, 1-Δt/tau_2, Δt/tau_2]
A[4,:] = [0, 0, 0, 1-Δt/tau_1]

B[:] = [0, 0, 0, Δt/tau_1]
C = σ_ϵ_mult_control_noise .* B
H[1,:] = [1, 0, 0, 0] #we're selecting only the position as observable 

D[1,:] = σ_ρ_mult_sensory_noise .* H[1,:]

#Matrix for cost function (quadratic form)
p_vec = [1; 0; 0; 0]
v_vec = [0; w_v; 0; 0]
f_vec = [0; 0; w_f; 0]

R_matrix = zeros(dimension_of_control, dimension_of_control, N_steps)
for i in 1:N_steps-1
    R_matrix[:,:,i] .= r/(N_steps-1)
end

Q_matrix = zeros(dimension_of_state, dimension_of_state, N_steps)
Q_N = p_vec*transpose(p_vec) .+ v_vec*transpose(v_vec) .+ f_vec*transpose(f_vec) #Matrix for the cost at the last time step 
Q_matrix[:,:,N_steps] .= Q_N[:,:]


#OPTIMIZATION PARAMETERS -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

#Todorov's algorithm
N_iter_Todorov = 500

#Numerical GD algorithm
algorithm_GD = GradientDescent()
#iterations_GD = 5000
iterations_GD = 50000
# Specify options for the optimization algorithm
options_GD = Optim.Options(
    # Step size for gradient descent
    iterations = iterations_GD,  # Number of iterations
    store_trace = false   # Show optimization trace
)

#MONTECARLO SIMULATIONS PARAMETERS -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

num_trials_MC_runs = 50000
random_seed_MC = rand(1:1000000)

#TODOROV OPTIMIZATION -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

println("\n------- (1) Todorov's optimization --------------------------\n")

L_TOD, K_TOD, expected_cost_TOD = Todorov_optimization_reaching_task_application_4D_state(Δt, N_steps, m, tau_1, tau_2, x_state_mean_0, Σ_1_x, Σ_1_z, r, w_v, w_f, σ_ξ, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_ϵ_mult_control_noise, Ω_η, N_iter_Todorov)

#Montecarlo simulations
final_cost_average_TOD, final_cost_average_std_TOD, p_t_average_TOD, p_t_average_std_TOD, p_hat_t_average_TOD, p_hat_t_average_std_TOD, control_t_average_TOD, control_t_average_std_TOD, p_t_TOD, p_hat_t_TOD, control_t_TOD, accumulated_cost_t_TOD = get_model_predictions_reaching_task_application_4D_state(num_trials_MC_runs, K_TOD, L_TOD, Δt, N_steps, m, tau_1, tau_2, x_state_mean_0, Σ_1_x, Σ_1_z, r, w_v, w_f, σ_ξ, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_ϵ_mult_control_noise, Ω_η, random_seed_MC)

#Expected cost using moment's propagation
expected_cost_mom_prop_TOD = moments_propagation_reaching_task_application_4D_state(K_TOD, L_TOD, Δt, N_steps, m, tau_1, tau_2, x_state_mean_0, Σ_1_x, Σ_1_z, r, w_v, w_f, σ_ξ, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_ϵ_mult_control_noise, Ω_η)

println("- Expected cost (TOD):$expected_cost_TOD\n")
println("- Expected cost using moment propagation (Todorov): $expected_cost_mom_prop_TOD\n")
println("- Montecarlo - cost function (Todorov): $final_cost_average_TOD  (error: $final_cost_average_std_TOD)\n")

#GD OPTIMIZATION -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

println("\n------- (2) Numerical GD optimization --------------------------\n")

#Objective function
cost_function_optimization_GD(x) = moments_propagation_reaching_task_application_4D_state_GD_optimization(x, A, B, C, D, H, Ω_ξ, Ω_η, Ω_ω, Q_N, r, x_state_mean_0, Σ_1_x, Σ_1_z, N_steps)

#Initial condition for the value to be optimized. Note that the vector x to be optimized has the following shape: x = Vector(1:state_dimension*(2*N_steps - 2)), with the first
#state_dimension*(N_steps-1) corresponding to the filter values, and the state_dimension*(N_steps-1) remaining ones are the control gains. We could impose that the filters are zero
#at the fiorst time step, since at the beginning the agent has perfect knowledge of the system's state [we leave it free to vary at the moment, since it doesn't change the # of free
#parameters significantly]. Therefore, we have state_dimension*(2*N_steps - 2) values to be optimized. With N_steps = 100 and state_dimension we have then 392 parameters. 
x_0 = zeros(dimension_of_state*(2*N_steps - 2))

x_0[1:dimension_of_state*(N_steps - 1)] = K_TOD[:] #initial condition for the filters
x_0[dimension_of_state*(N_steps - 1)+1:end] = L_TOD[:] #initial condition for the control gains

result_GD = optimize(cost_function_optimization_GD, x_0, algorithm_GD, options_GD)

x_opt_GD = result_GD.minimizer
expected_cost_GD_optim = result_GD.minimum

K_GD = reshape(x_opt_GD[1:dimension_of_state*(N_steps - 1)], dimension_of_state, observation_dimension, N_steps - 1)
L_GD = reshape(x_opt_GD[dimension_of_state*(N_steps - 1)+1:end], 1, dimension_of_state, N_steps - 1)

#Montecarlo simulations [Note that we use the same random seed as the one used to compute the accumulated cost (with MC) for Todorov's solution]
final_cost_average_GD, final_cost_average_std_GD, p_t_average_GD, p_t_average_std_GD, p_hat_t_average_GD, p_hat_t_average_std_GD, control_t_average_GD, control_t_average_std_GD, p_t_GD, p_hat_t_GD, control_t_GD, accumulated_cost_t_GD = get_model_predictions_reaching_task_application_4D_state(num_trials_MC_runs, K_GD, L_GD, Δt, N_steps, m, tau_1, tau_2, x_state_mean_0, Σ_1_x, Σ_1_z, r, w_v, w_f, σ_ξ, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_ϵ_mult_control_noise, Ω_η, random_seed_MC)

println("- Expected cost (moment's propagation):$expected_cost_GD_optim\n")
println("- Montecarlo - cost function (GD): $final_cost_average_GD  (error: $final_cost_average_std_GD)\n")

#SAVE OPTIMIZED ESTIMATORS AND CONTROLLERS FOR THE TWO APPROACHES -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

cd(@__DIR__)#to go back to the directory of the script
folder_location = "../../Data/Multi_D_problems/sensory_motor_task/New_Data"

#Note: change name to the files everytime you run the script, otherwise it will overwrite the previous results.

#Todorov
file_name_save_study_cost = "Reaching_task_NEURIPS_TOD_K_s_eta_0_0.jld"
file_path_save_study_cost = folder_location * "/" *file_name_save_study_cost
save(file_path_save_study_cost,"K_TOD", K_TOD)

file_name_save_study_cost = "Reaching_task_NEURIPS_TOD_L_s_eta_0_0.jld"
file_path_save_study_cost = folder_location * "/" *file_name_save_study_cost
save(file_path_save_study_cost,"L_TOD", L_TOD)

#GD
file_name_save_study_cost = "Reaching_task_NEURIPS_GD_K_s_eta_0_0.jld"
file_path_save_study_cost = folder_location * "/" *file_name_save_study_cost
save(file_path_save_study_cost,"K_GD", K_GD)

file_name_save_study_cost = "Reaching_task_NEURIPS_GD_L_s_eta_0_0.jld"
file_path_save_study_cost = folder_location * "/" *file_name_save_study_cost
save(file_path_save_study_cost,"L_GD", L_GD)
