#PACKAGES -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

using Optim, Plots, DelimitedFiles, LinearAlgebra, Random, StatsBase, FiniteDifferences, LaTeXStrings , EasyFit, Printf, FFTW, Pkg, Noise, Clustering, Dierckx, BSplineKit, MultivariateStats, Flux, Combinatorics, Bigsimr, DataFrames, JLD, Distributions, FFTW, Statistics

#FUNCTIONS -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

#Functions for Todorov's and Numerical GD algorithms (1D)
cd(@__DIR__)#to go back to the directory of the script
include("../MyFunctions/functions_TOD_and_GD_1D.jl")

#Functions for full GD algorithms (1D and multiple dimensions)
cd(@__DIR__)#to go back to the directory of the script
include("../MyFunctions/functions_full_GD_algorithms.jl")

#TASK PARAMETERS -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Σ_x_0 = 0.0  # one of these two variances (note that they're variances) must be set at zero [probably we should always have Σ_z_0 = 0 to be consisten with (Todorov, 2005)--> see point (A) in "TO BE EXPLORED" session in the file "functions_full_GD_algorithms.jl"]
Σ_z_0 = 0.0
μ_x_0 = 1.0  # these two variables, the initial means of the state and state estimate, should be the same
μ_z_0 = μ_x_0
x_z_0_mean = μ_x_0
T =  100 #Note that T must be T>2 to have a non-trivial problem and to make the scripts run (with T=2 there is no estimation problem to solve)
q = 1
q_T = 20 #q_T is the weight of the terminal state in the cost function, higher than q to make the terminal state more important (being the task relevant cost in most of the cases)
r = 1
a = 1.5
b = 1.0
m = a
n = b
H = 1
σ_ξ = 1.0
σ_ϵ_control_dep_noise = 0.5
σ_ω_add_sensory_noise = 1.0
σ_ρ_mult_sensory_noise = 0.5 #NOTE: in the pdf this is called σ_ν
σ_η_internal_noise = 0.0

#OPTIMIZATION PARAMETERS -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

#Todorov
iterations_TOD = 5000

#Define the initial guess for the parameter vector to be optimized
k_0 = a                           
l_0 = -0.01  #l has opposite sign wrt Todorov paper, be careful {usually, the control is negative}

#Numerical GD 
algorithm_GD = GradientDescent()
iterations_GD = 5000
# Specify options for the optimization algorithm
options_GD = Optim.Options(
    # Step size for gradient descent
    iterations = iterations_GD,  # Number of iterations
    store_trace = false   # Show optimization trace
)

#x_0 is the initial condition for the vector optmised in the classic GD optimisation. Below we set the initial condition as Todorov's solutions for k_t and l_t.
x_0 = k_0 .* ones(2*(T-1))        #we assume uniform values for k_0 and l_0 as initial conditions before the optimization
x_0[T:2*(T-1)] = l_0 .* ones(T-1)

#MC simulations
realizations_for_averaged_cost = 10000 #it depends on the noise level, but to get accurate estimates through the MC method, it seems we should use realizations ≈ 10^4 at least (as soon 
seed_MC = 6789

#TODOROV OPTIMIZATION -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

println("\n------- (1) Todorov's optimization --------------------------\n")

k_opt_TOD, l_opt_TOD = K_L_Todorov_optimization(iterations_TOD, x_z_0_mean, Σ_x_0, Σ_z_0, k_0, l_0, a, b, H, r, q, q_T, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)

MC_cost_TOD, MC_error_cost_TOD, cost_each_realization_TOD = cost_MC_sampling(realizations_for_averaged_cost, seed_MC, Σ_x_0, Σ_z_0, μ_x_0, μ_z_0, T, q, q_T, r, a, b, m, n, H, l_opt_TOD, k_opt_TOD, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)
expected_cost_mom_prop_TOD = expected_cost_moments_propagation_k_l_separated(Σ_x_0, Σ_z_0, μ_x_0, μ_z_0, T, q, q_T, r, a, b, m, n, H, k_opt_TOD, l_opt_TOD, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)

println("- Expected cost using moment propagation (Todorov): $expected_cost_mom_prop_TOD\n")
println("- Montecarlo - cost function (Todorov): $MC_cost_TOD  (error: $MC_error_cost_TOD)\n")

#Full GD OPTIMIZATION WITH ORTHOGONALITY PRINCIPLE -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

println("\n------- (2) Full GD optimization with orthogonality princicple --------------------------\n")

n_order_vec_omega_zero = zeros(T-2)
for t in 1:T-2
    n_order_vec_omega_zero[t] = T-t
end

iterations_full_optimization = 1000
iterations_control_optimization = 1
iterations_estimation_optimization = 1

l_vec_start = zeros(T-1)
k_vec_start = zeros(T-1)

l_vec_start[:] .= l_opt_TOD[:]
k_vec_start[:] .= k_opt_TOD[:]

l_vec_optimal_full_GD, k_vec_optimal_full_GD = Full_GD_control_backward_optimization_with_orthogonality_principle_filter_optimization_coordinate_descend(n_order_vec_omega_zero, iterations_full_optimization, iterations_control_optimization, iterations_estimation_optimization, l_vec_start, k_vec_start, Σ_x_0, Σ_z_0, μ_z_0, T, q, q_T, r, a, b, H, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)
MC_cost_full_GD, MC_error_cost_full_GD, cost_each_realization_full_GD = cost_MC_sampling(realizations_for_averaged_cost, seed_MC, Σ_x_0, Σ_z_0, μ_x_0, μ_z_0, T, q, q_T, r, a, b, m, n, H, l_vec_optimal_full_GD, k_vec_optimal_full_GD, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)
expected_cost_mom_prop_full_GD = expected_cost_moments_propagation_k_l_separated(Σ_x_0, Σ_z_0, μ_x_0, μ_z_0, T, q, q_T, r, a, b, m, n, H, k_vec_optimal_full_GD, l_vec_optimal_full_GD, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)

println("- Expected cost using moment propagation (Full GD with orth princicple): $expected_cost_mom_prop_full_GD\n")
println("- Montecarlo - cost function (Full GD with orth princicple): $MC_cost_full_GD  (error: $MC_error_cost_full_GD)\n")

#Iterative GD OPTIMIZATION -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

println("\n------- (3) Iterative GD optimization --------------------------\n")

n_order_vec_iterative_GD = zeros(T-2)
for t in 1:T-2
    n_order_vec_iterative_GD[t] = T-t
end

iterations_full_optimization_iterative_GD = 1000
iterations_control_optimization_iterative_GD = 1
iterations_estimation_optimization_iterative_GD = 1

l_vec_start_iterative_GD = zeros(T-1)
k_vec_start_iterative_GD = zeros(T-1)

l_vec_start_iterative_GD[:] .= l_opt_TOD[:]
k_vec_start_iterative_GD[:] .= k_opt_TOD[:]

l_vec_optimal_iterative_GD, k_vec_optimal_iterative_GD = Iterative_GD_control_and_estimation_backward_optimization_coordinate_descend(n_order_vec_iterative_GD, iterations_full_optimization_iterative_GD, iterations_control_optimization_iterative_GD, iterations_estimation_optimization_iterative_GD, l_vec_start_iterative_GD, k_vec_start_iterative_GD, Σ_x_0, Σ_z_0, μ_z_0, T, q, q_T, r, a, b, H, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)
MC_cost_iterative_GD, MC_error_cost_iterative_GD, cost_each_realization_iterative_GD = cost_MC_sampling(realizations_for_averaged_cost, seed_MC, Σ_x_0, Σ_z_0, μ_x_0, μ_z_0, T, q, q_T, r, a, b, m, n, H, l_vec_optimal_iterative_GD, k_vec_optimal_iterative_GD, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)
expected_cost_mom_prop_iterative_GD = expected_cost_moments_propagation_k_l_separated(Σ_x_0, Σ_z_0, μ_x_0, μ_z_0, T, q, q_T, r, a, b, m, n, H, k_vec_optimal_iterative_GD, l_vec_optimal_iterative_GD, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)

println("- Expected cost (Iterative GD):$expected_cost_mom_prop_iterative_GD\n")
println("- Montecarlo - cost function (Iterative GD): $MC_cost_iterative_GD  (error: $MC_error_cost_iterative_GD)\n")

#Numerical GD OPTIMIZATION -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

println("\n------- (4) Numerical GD optimization --------------------------\n")

x_0[1:T-1] = k_opt_TOD[:]
x_0[T:2*(T-1)] = l_opt_TOD[:]

#define the cost function
cost_function_optimization_GD(x) = expected_cost_moments_propagation(Σ_x_0, Σ_z_0, μ_x_0, μ_z_0, T, q, q_T, r, a, b, m, n, H, x, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)
        
result_GD = optimize(cost_function_optimization_GD, x_0, algorithm_GD, options_GD)
x_opt_GD = result_GD.minimizer
expected_cost_GD_optim = result_GD.minimum

k_opt_GD = x_opt_GD[1:T-1]
l_opt_GD = x_opt_GD[T:2*(T-1)]

MC_cost_GD, MC_error_cost_GD, cost_each_realization_GD = cost_MC_sampling(realizations_for_averaged_cost, seed_MC, Σ_x_0, Σ_z_0, μ_x_0, μ_z_0, T, q, q_T, r, a, b, m, n, H, l_opt_GD, k_opt_GD, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)
expected_cost_mom_prop_GD = expected_cost_moments_propagation_k_l_separated(Σ_x_0, Σ_z_0, μ_x_0, μ_z_0, T, q, q_T, r, a, b, m, n, H, k_opt_GD, l_opt_GD, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)

println("- Expected cost (GD):$expected_cost_mom_prop_GD\n")
println("- Montecarlo - cost function (GD): $MC_cost_GD  (error: $MC_error_cost_GD)\n")
