##STRUCT TYPE DEFINITION

#This struct stores the parameters used for the task implemented to show the validity of our numerical approaches to solve the control problem with internal noise.
#The example is adapted from (Todorov, 2005). The state vector is given by x(t)=[p(t), \dot{p}(t), f(t), g(t), p⋆], where p(t) is the hand position, \dot{p}(t) the hand 
#velocity, f(t) the force acting on the hand, g(t) the auxialiary variable to map contorl signal into force (through the second order low pass filter), and p⋆ is the 
#target position. Note that p⋆ is included in the state vector so that the cost function can be captured by using a quadratic form without the need of linear terms.
#Note that the goal of the task is to reach the target position in N_steps steps. We also implemented another version with a 4D state vector (instead of 5D), which should be
#more "reasonable" if we want to consider the effect of sensory dependent noise (see the comments above the function "get_model_predictions_reaching_task_application_4D_state"
#in this document).

mutable struct Model_reaching_task_application
    
    #NOISE --> All of them are scalar values and we need to build the covariances matrices. Note that these calues are all defined as standard deviations.
    σ_ξ::Float64 #additive noise acting on the state dynamics
    σ_ω_add_sensory_noise::Float64 #Additive sensory noise
    σ_ρ_mult_sensory_noise::Float64 #Multiplicative sensory noise
    σ_ϵ_mult_control_noise::Float64 #Multiplicative control dependent noise
    σ_η_internal_noise::Float64 #additive internal noise acting on the state estimate

    #DYNAMICS
    m::Float64 #The hand is modeled as a point mass, with mass m
    target_position::Float64 #Target position

    #CONTROL TO FORCE MAPPING 
    #The control signal u(t) is transofrmed into force f(t), acting on the hand, by adding a control dependent multiplicative noise and applying a second order muscle like low pass filter
    tau_1::Float64 #First time constant of the low pass filter   
    tau_2::Float64 #Second time constant of the low pass filter
    
    #TIME PARAMETERS
    Δt::Float64
    N_steps::Int64

    #COST FUNCTION
    #Note that q_t = 0 for t < t_end = N_steps*Δt 
    r::Float64 #Energy consumption term
    w_v::Float64 #Term enforcing that the movement has to stop at time t_end = N_steps*Δt [velocity has to vanish]
    w_f::Float64 #Term enforcing that the movement has to stop at time t_end = N_steps*Δt [force has to vanish]

    #INITIAL CONDITIONS --> it must be a 1x5 vector (1 x dimension of the state). Note that the initial uncertainty is set to zero: Σ_1 = 0
    x_0::Array{Float64,1}

end

##CREATE THE MODEL

#Function to create and initialize the model. MyInitialParams is a vector containing the parameters of the model in the order specified in the struct definition and in the bvariable MyParamsNames.
function CreateAndInitialize_Model_reaching_task_application(MyInitialParams)
 
    MyModel_instance = Model_reaching_task_application(MyInitialParams...)
    MyParamsNames = ["σ_ξ", "σ_ω_add_sensory_noise", "σ_ρ_mult_sensory_noise", "σ_ϵ_mult_control_noise", "σ_η_internal_noise", "m", "target_position", "tau_1", "tau_2", "Δt", "N_steps", "r", "w_v", "w_f", "x_0"]

    return MyModel_instance, MyParamsNames

end

##FUNCTIONS TO SIMULATE THE MODELS

#Note that L must be a  1 x 5 x (time_steps-1) matrix (we want a 1D control signal). The state dimensionality here is 5.
#K must be a 5 x 1 x (time_steps-1) matrix (filter gains are 5x1 matrices, if we assume a 1D sensory input). 
function get_model_predictions_reaching_task_application(num_trials, K, L, Δt, N_steps, m, target_position, tau_1, tau_2, x_0, r, w_v, w_f, σ_ξ, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_ϵ_mult_control_noise, σ_η_internal_noise, random_seed)
    
    #NOTE that these variables: "σ_ξ, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_ϵ_mult_control_noise, σ_η" are all STANDARD DEVIATIONS.
    #Note that we assume senosry dependent noise acting only on the position feedback (due to eccentricity). The additive noise σ_ξ only affects the velocity 
    #to model and additive noise coming from the control to force mapping (we put it there so that it's not filtered by the second order filter)
    
    Random.seed!(random_seed) #set the seed to compare different solutions with the same realizations of the noise

    #Dimensionality
    state_dimension = 5
    observation_dimension = 1
    
    #observation_dimension = 3
    
    #Define the dynamical system
    A = zeros(state_dimension,state_dimension)
    B = zeros(state_dimension,1)
    H = zeros(observation_dimension,state_dimension)
    D = zeros(observation_dimension,state_dimension)
    
    Ω_ξ_sqrt = Diagonal([0, σ_ξ, 0, 0, 0]) #we consider the sqrt of the covariance matrix, so we don't need to call the function sqrt at each time step iteration
    Ω_η_sqrt = Diagonal([σ_η_internal_noise, 0, 0, 0, 0]) #Internal noise is acting only on the osberved variables (position, velocity: visual feedbacks; force: proprioception)
    Ω_ω_sqrt = σ_ω_add_sensory_noise #Additive sensory noise (1D sensory feedback)

    A[1,:] = [1, Δt, 0, 0, 0]
    A[2,:] = [0, 1, Δt/m, 0, 0]
    A[3,:] = [0, 0, 1-Δt/tau_2, Δt/tau_2, 0]
    A[4,:] = [0, 0, 0, 1-Δt/tau_1, 0]
    A[5,:] = [0, 0, 0, 0, 1]

    B[:] = [0, 0, 0, Δt/tau_1, 0]
    C = σ_ϵ_mult_control_noise .* B
    H[1,:] = [1, 0, 0, 0, 0] #we're selecting only the position as observable 
    # H[2,:] = [0, 1, 0, 0, 0]
    # H[3,:] = [0, 0, 1, 0, 0]

    D[1,:] = σ_ρ_mult_sensory_noise .* H[1,:]

    #Matrix for cost function (quadratic form)
    p_vec = [1; 0; 0; 0; -1]
    v_vec = [0; w_v; 0; 0; 0]
    f_vec = [0; 0; w_f; 0; 0]

    Q_N = p_vec*transpose(p_vec) .+ v_vec*transpose(v_vec) .+ f_vec*transpose(f_vec) 

    #Variables to give as outputs
    p_t = zeros(num_trials, N_steps) #this is the position over time per each trial
    p_hat_t = zeros(num_trials, N_steps-1) #this is position ESTIMATE over time per each trial
    control_t = zeros(num_trials, N_steps-1) #this is the control signal (which is 1D) over time per each trial
    accumulated_cost_t = zeros(num_trials, N_steps) #this is the accumulated cost over time per each trial

    p_t_average = zeros(N_steps) #average (averaged over trials - i.e. different realizations of the noise) position
    p_t_average_std = zeros(N_steps) #std of the average (averaged over trials - i.e. different realizations of the noise) position
    
    p_hat_t_average = zeros(N_steps-1) #average (averaged over trials - i.e. different realizations of the noise) position ESTIMATE 
    p_hat_t_average_std = zeros(N_steps-1) #std of the average (averaged over trials - i.e. different realizations of the noise) position ESTIMATE 
    
    control_t_average = zeros(N_steps-1) #average (averaged over trials - i.e. different realizations of the noise) control
    control_t_average_std = zeros(N_steps-1) #std of the average (averaged over trials - i.e. different realizations of the noise) control

    final_cost_average = zeros(1) #accumulated cost at the end of the trial, averaged over differen trials
    final_cost_average_std = zeros(1) #std of accumulated cost at the end of the trial, over differen trials
    
    #Initial state
    initial_state = zeros(state_dimension)
    initial_state .= x_0
    
    Threads.@threads for j in 1:num_trials
        
        #initial conditions
       
        x_state = zeros(state_dimension,1)
        x_state_estimate = zeros(state_dimension,1)
        
        x_state .= initial_state
        x_state_estimate .= x_state #we assume the intial state and state estimate to be the same at the beginning of the trial
        
        p_t[j,1] = x_state[1]
        p_hat_t[j,1] = x_state_estimate[1]

        accumulated_cost_t[j,1] = 0

        for i in 2:N_steps
            
            control_temp = dot(L[:, :, i-1],x_state_estimate) #1D control signal 
            control_t[j,i-1] = control_temp
            accumulated_cost_t[j,i] = accumulated_cost_t[j,i-1] .+ r/(N_steps-1)*control_temp^2
            
            observation_temp = H*x_state .+ Ω_ω_sqrt*randn(observation_dimension) .+ D*x_state*randn(observation_dimension) #1D sensory feedback

            x_state = A*x_state .+ B*control_temp .+ Ω_ξ_sqrt*randn(state_dimension) .+ C*control_temp .* randn() #5D state vector 
            x_state_estimate = A*x_state_estimate .+ B*control_temp .+ K[:, :, i-1].*(observation_temp .- H*x_state_estimate) .+ Ω_η_sqrt*randn(state_dimension) #5D state estimate vector 
            
            p_t[j,i] = x_state[1]
            p_hat_t[j,i-1] = x_state_estimate[1]

        end 
        
        accumulated_cost_t[j,end] = accumulated_cost_t[j,end] .+ (transpose(x_state)*Q_N*x_state)[] #Final cost; the [] are needed to convert the matrix into a scalar and add it to the accumulated cost
        
    end

    #average over trials

    p_t_average = mean(p_t, dims = 1)
    p_t_average_std = std(p_t, dims = 1)

    p_hat_t_average = mean(p_hat_t, dims = 1)
    p_hat_t_average_std = std(p_hat_t, dims = 1)

    control_t_average = mean(control_t, dims = 1)
    control_t_average_std = std(control_t, dims = 1)

    final_cost_average = mean(accumulated_cost_t[:,end])
    final_cost_average_std = std(accumulated_cost_t[:,end])/sqrt(num_trials) #error of the mean

    return final_cost_average, final_cost_average_std, p_t_average, p_t_average_std, p_hat_t_average, p_hat_t_average_std, control_t_average, control_t_average_std, p_t, p_hat_t, control_t, accumulated_cost_t

end

#Same function as before, but implemented for the case with the 4D case: the initial condition (for the position) is the target position and the goal is to approach zero 
#(it should simply be a translation of the position axis). However, the 5D and 4D case provide different expected costs, with the 4D example resulting in lower expecetd cost,
#even for r=0: UNDERSTAND why --> The difference is due to the sensory dependent noise! THIS IS IMPORTANT FOR OUR FITTING PROCEDURES! I guess in our case the choice of the 
#model (5D or 4D) could depend on the requirement regarding the fixation or not --> if the participants are free to move their eyes, then it would make sense to choose the
#4D system, since eccentricity would be determinant. In the other case, the 5D approach could be closer to the experimental condition ...  THINK ABOUT IT!!!

#Note that the 4D approach should be more reasonable, since we want to reach an object in the periphery: we start a large sensory noise to estimate such position and we conclude
#with a smaller one, if the control signal manages to get closer to the target. Note that if x_target != 0, then the sensory dependent noise should be of the form ϵ*(x_t - x_target).

function get_model_predictions_reaching_task_application_4D_state(num_trials, K, L, Δt, N_steps, m, tau_1, tau_2, x_0, Σ_1_x, Σ_1_z, r, w_v, w_f, σ_ξ, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_ϵ_mult_control_noise, Ω_η, random_seed)
    
    #NOTE that these variables: "σ_ξ, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_ϵ_mult_control_noise, σ_η" are all STANDARD DEVIATIONS.
    #Note that we assume senosry dependent noise acting only on the position feedback (due to eccentricity). The additive noise σ_ξ only affects the velocity 
    #to model and additive noise coming from the control to force mapping (we put it there so that it's not filtered by the second order filter)
    
    Random.seed!(random_seed) #set the seed to compare different solutions with the same realizations of the noise

    #Dimensionality
    state_dimension = 4
    observation_dimension = 1

    #observation_dimension = 3

    #Define the dynamical system
    A = zeros(state_dimension,state_dimension)
    B = zeros(state_dimension,1)
    H = zeros(observation_dimension,state_dimension)
    D = zeros(observation_dimension,state_dimension)
    
    Ω_ξ_sqrt = Diagonal([0, σ_ξ, 0, 0]) #we consider the sqrt of the covariance matrix, so we don't need to call the function sqrt at each time step iteration
    Ω_η_sqrt = sqrt.(Ω_η) #Internal noise is acting only on the osberved variables (position, velocity: visual feedbacks; force: proprioception)
    Ω_ω_sqrt = σ_ω_add_sensory_noise #Additive sensory noise (1D sensory feedback)

    A[1,:] = [1, Δt, 0, 0]
    A[2,:] = [0, 1, Δt/m, 0]
    A[3,:] = [0, 0, 1-Δt/tau_2, Δt/tau_2]
    A[4,:] = [0, 0, 0, 1-Δt/tau_1]

    B[:] = [0, 0, 0, Δt/tau_1]
    C = σ_ϵ_mult_control_noise .* B
    H[1,:] = [1, 0, 0, 0] #we're selecting only the position as observable 
    # H[2,:] = [0, 1, 0, 0, 0]
    # H[3,:] = [0, 0, 1, 0, 0]

    D[1,:] = σ_ρ_mult_sensory_noise .* H[1,:]

    #Matrix for cost function (quadratic form)
    p_vec = [1; 0; 0; 0]
    v_vec = [0; w_v; 0; 0]
    f_vec = [0; 0; w_f; 0]

    Q_N = p_vec*transpose(p_vec) .+ v_vec*transpose(v_vec) .+ f_vec*transpose(f_vec) 
    
    #Intial state 
    initial_state = zeros(state_dimension)
    initial_state .= x_0

    #Variables to give as outputs
    p_t = zeros(num_trials, N_steps) #this is the position over time per each trial
    p_hat_t = zeros(num_trials, N_steps) #this is position ESTIMATE over time per each trial
    control_t = zeros(num_trials, N_steps-1) #this is the control signal (which is 1D) over time per each trial

    accumulated_cost_t = zeros(num_trials, N_steps) #this is the accumulated cost over time per each trial

    p_t_average = zeros(N_steps) #average (averaged over trials - i.e. different realizations of the noise) position
    p_t_average_std = zeros(N_steps) #std of the average (averaged over trials - i.e. different realizations of the noise) position
   
    p_hat_t_average = zeros(N_steps) #average (averaged over trials - i.e. different realizations of the noise) position ESTIMATE 
    p_hat_t_average_std = zeros(N_steps) #std of the average (averaged over trials - i.e. different realizations of the noise) position ESTIMATE 

    control_t_average = zeros(N_steps-1) #average (averaged over trials - i.e. different realizations of the noise) control
    control_t_average_std = zeros(N_steps-1) #std of the average (averaged over trials - i.e. different realizations of the noise) control

    final_cost_average = zeros(1) #accumulated cost at the end of the trial, averaged over differen trials
    final_cost_average_std = zeros(1) #std of accumulated cost at the end of the trial, over differen trials
    
    Threads.@threads for j in 1:num_trials
        
        #initial conditions
       
        x_state = zeros(state_dimension,1)
        x_state_estimate = zeros(state_dimension,1)
        
        initial_state_noisy_terms = sqrt.(diag(Σ_1_x)) .* randn(state_dimension)
        initial_state_estimate_noisy_terms = sqrt.(diag(Σ_1_z)) .* randn(state_dimension)

        x_state .= initial_state .+ initial_state_noisy_terms
        x_state_estimate .= initial_state .+ initial_state_estimate_noisy_terms
        
        p_t[j,1] = x_state[1]
        p_hat_t[j,1] = x_state_estimate[1]

        accumulated_cost_t[j,1] = 0

        for i in 2:N_steps
            
            control_temp = dot(L[:, :, i-1],x_state_estimate) #1D control signal 
            control_t[j,i-1] = control_temp
            accumulated_cost_t[j,i] = accumulated_cost_t[j,i-1] .+ r/(N_steps-1)*control_temp^2
            
            observation_temp = H*x_state .+ Ω_ω_sqrt*randn(observation_dimension) .+ D*x_state*randn(observation_dimension) #1D sensory feedback

            x_state = A*x_state .+ B*control_temp .+ Ω_ξ_sqrt*randn(state_dimension) .+ C*control_temp .* randn() #4D state vector 
            x_state_estimate = A*x_state_estimate .+ B*control_temp .+ K[:, :, i-1].*(observation_temp .- H*x_state_estimate) .+ Ω_η_sqrt*randn(state_dimension) #4D state estimate vector 
            
            p_t[j,i] = x_state[1]
            p_hat_t[j,i] = x_state_estimate[1]

        end 
        
        accumulated_cost_t[j,end] = accumulated_cost_t[j,end] .+ (transpose(x_state)*Q_N*x_state)[] #Final cost; the [] are needed to convert the matrix into a scalar and add it to the accumulated cost
        
    end

    #average over trials

    p_t_average = mean(p_t, dims = 1)
    p_t_average_std = std(p_t, dims = 1)

    p_hat_t_average = mean(p_hat_t, dims = 1)
    p_hat_t_average_std = std(p_hat_t, dims = 1)

    control_t_average = mean(control_t, dims = 1)
    control_t_average_std = std(control_t, dims = 1)

    final_cost_average = mean(accumulated_cost_t[:,end])
    final_cost_average_std = std(accumulated_cost_t[:,end])/sqrt(num_trials) #error of the mean

    return final_cost_average, final_cost_average_std, p_t_average, p_t_average_std, p_hat_t_average, p_hat_t_average_std, control_t_average, control_t_average_std, p_t, p_hat_t, control_t, accumulated_cost_t

end

#Same function as before, extracting ONLY the speed profiles (over time) predicted by the model(s). We give here as output the velocity (average, std and velocity per each realization), and the same for its inferred internal variable (p_dot_hat).
function get_model_speed_profiles_reaching_task_application_4D_state(num_trials, K, L, Δt, N_steps, m, tau_1, tau_2, x_0, r, w_v, w_f, σ_ξ, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_ϵ_mult_control_noise, σ_η_internal_noise, random_seed)
    
    #NOTE that these variables: "σ_ξ, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_ϵ_mult_control_noise, σ_η" are all STANDARD DEVIATIONS.
    #Note that we assume senosry dependent noise acting only on the position feedback (due to eccentricity). The additive noise σ_ξ only affects the velocity 
    #to model and additive noise coming from the control to force mapping (we put it there so that it's not filtered by the second order filter)
    
    Random.seed!(random_seed) #set the seed to compare different solutions with the same realizations of the noise

    #Dimensionality
    state_dimension = 4
    observation_dimension = 1

    #observation_dimension = 3

    #Define the dynamical system
    A = zeros(state_dimension,state_dimension)
    B = zeros(state_dimension,1)
    H = zeros(observation_dimension,state_dimension)
    D = zeros(observation_dimension,state_dimension)
    
    Ω_ξ_sqrt = Diagonal([0, σ_ξ, 0, 0]) #we consider the sqrt of the covariance matrix, so we don't need to call the function sqrt at each time step iteration
    Ω_η_sqrt = Diagonal([σ_η_internal_noise, 0, 0, 0]) #Internal noise is acting only on the osberved variables (position, velocity: visual feedbacks; force: proprioception)
    Ω_ω_sqrt = σ_ω_add_sensory_noise #Additive sensory noise (1D sensory feedback)

    A[1,:] = [1, Δt, 0, 0]
    A[2,:] = [0, 1, Δt/m, 0]
    A[3,:] = [0, 0, 1-Δt/tau_2, Δt/tau_2]
    A[4,:] = [0, 0, 0, 1-Δt/tau_1]

    B[:] = [0, 0, 0, Δt/tau_1]
    C = σ_ϵ_mult_control_noise .* B
    H[1,:] = [1, 0, 0, 0] #we're selecting only the position as observable 
    # H[2,:] = [0, 1, 0, 0, 0]
    # H[3,:] = [0, 0, 1, 0, 0]

    D[1,:] = σ_ρ_mult_sensory_noise .* H[1,:] 
    
    #Intial state 
    initial_state = zeros(state_dimension)
    initial_state .= x_0

    #Variables to give as outputs
    p_dot_t = zeros(num_trials, N_steps) #this is the velocity over time per each trial
    p_dot_hat_t = zeros(num_trials, N_steps) #this is velocity ESTIMATE over time per each trial
  
    p_dot_t_average = zeros(N_steps) #average (averaged over trials - i.e. different realizations of the noise) velocity
    p_dot_t_average_std = zeros(N_steps) #std of the average (averaged over trials - i.e. different realizations of the noise) velocity
    
    p_dot_hat_t_average = zeros(N_steps) #average (averaged over trials - i.e. different realizations of the noise) velocity ESTIMATE 
    p_dot_hat_t_average_std = zeros(N_steps) #std of the average (averaged over trials - i.e. different realizations of the noise) velocity ESTIMATE 
    
    Threads.@threads for j in 1:num_trials
        
        #initial conditions
       
        x_state = zeros(state_dimension,1)
        x_state_estimate = zeros(state_dimension,1)
        
        x_state .= initial_state
        x_state_estimate .= x_state #we assume the intial state and state estimate to be the same at the beginning of the trial
        
        p_dot_t[j,1] = x_state[2]
        p_dot_hat_t[j,1] = x_state_estimate[2]

        for i in 2:N_steps
            
            control_temp = dot(L[:, :, i-1],x_state_estimate) #1D control signal 
            observation_temp = H*x_state .+ Ω_ω_sqrt*randn(observation_dimension) .+ D*x_state*randn(observation_dimension) #1D sensory feedback

            x_state = A*x_state .+ B*control_temp .+ Ω_ξ_sqrt*randn(state_dimension) .+ C*control_temp .* randn() #4D state vector 
            x_state_estimate = A*x_state_estimate .+ B*control_temp .+ K[:, :, i-1].*(observation_temp .- H*x_state_estimate) .+ Ω_η_sqrt*randn(state_dimension) #4D state estimate vector 
            
            p_dot_t[j,i] = x_state[2]
            p_dot_hat_t[j,i] = x_state_estimate[2]

        end 
                
    end

    #average over trials

    p_dot_t_average = mean(p_dot_t, dims = 1)
    p_dot_t_average_std = std(p_dot_t, dims = 1)

    p_dot_hat_t_average = mean(p_dot_hat_t, dims = 1)
    p_dot_hat_t_average_std = std(p_dot_hat_t, dims = 1)

    return p_dot_t_average, p_dot_t_average_std, p_dot_hat_t_average, p_dot_hat_t_average_std, p_dot_t, p_dot_hat_t

end

## TODOROV OPTIMIZATION :  Note that the following function gives as output -L_optim, so that we don't need to change its sign. Indeed, Todorov uses the following notation:
# u_t = -L_t*x_hat_t, while we use u_t = L_t*x_hat_t
    
function Todorov_optimization_reaching_task_application(Δt, N_steps, m, target_position, tau_1, tau_2, x_0, r, w_v, w_f, σ_ξ, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_ϵ_mult_control_noise, σ_η_internal_noise, N_iter = 500)
    
    #Dimensionality
    state_dimension = 5
    observation_dimension = 1

    #observation_dimension = 3

    #Define the dynamical system
    A = zeros(state_dimension,state_dimension)
    B = zeros(state_dimension,1)
    H = zeros(observation_dimension,state_dimension)
    D = zeros(observation_dimension,state_dimension)
    
    Ω_ξ = Diagonal([0, σ_ξ^2, 0, 0, 0]) #we consider the sqrt of the covariance matrix, so we don't need to call the function sqrt at each time step iteration
    Ω_η = Diagonal([σ_η_internal_noise^2, 0, 0, 0, 0]) #Internal noise is acting only on the osberved variables (position, velocity: visual feedbacks; force: proprioception)
    Ω_ω = σ_ω_add_sensory_noise^2 #Additive sensory noise (1D sensory feedback)

    A[1,:] = [1, Δt, 0, 0, 0]
    A[2,:] = [0, 1, Δt/m, 0, 0]
    A[3,:] = [0, 0, 1-Δt/tau_2, Δt/tau_2, 0]
    A[4,:] = [0, 0, 0, 1-Δt/tau_1, 0]
    A[5,:] = [0, 0, 0, 0, 1]

    B[:] = [0, 0, 0, Δt/tau_1, 0]
    C = σ_ϵ_mult_control_noise .* B
    H[1,:] = [1, 0, 0, 0, 0] #we're selecting only the position as observable 
    # H[2,:] = [0, 1, 0, 0, 0]
    # H[3,:] = [0, 0, 1, 0, 0]

    D[1,:] = σ_ρ_mult_sensory_noise .* H[1,:]

    #Matrix for cost function (quadratic form)
    p_vec = [1; 0; 0; 0; -1]
    v_vec = [0; w_v; 0; 0; 0]
    f_vec = [0; 0; w_f; 0; 0]

    Q_N = p_vec*transpose(p_vec) .+ v_vec*transpose(v_vec) .+ f_vec*transpose(f_vec) 
    
    #Intial state 
    initial_state = zeros(state_dimension)
    initial_state .= x_0

    #OPTIMIZATION
    
    L = zeros(1,state_dimension,(N_steps-1))
    K = zeros(state_dimension,observation_dimension,(N_steps-1))

    for i in 1:N_iter

        #OPTIMAL CONTROLLER --> backwards through time

        #Initial (i.e. final) conditions
        Sx_tp1 = Q_N      
        Se_tp1 = zeros(state_dimension,state_dimension)

        #Note that for the given system the last control vector is always zero, because transpose(B)*Q_N = 0 [the last cost function doesn't include "interactions" with the control part of the dynamics].
        #We then update Se_tp1 and S_xtp1 and start the for loop from N_steps - 2 instead of N_steps - 1

        idx = N_steps-1
        cSx_tp1 = transpose(A)*Sx_tp1*(A .- B*L[:,:,idx]) .+ transpose(D)*transpose(K[:,:,idx])*Se_tp1*K[:,:,idx]*D
        Se_tp1 = transpose(A)*Sx_tp1*B*L[:,:,idx] .+ transpose(A .- K[:,:,idx]*H)*Se_tp1*(A .- K[:,:,idx]*H)
        Sx_tp1 = cSx_tp1

        for idx in (N_steps-2):-1:1

            L[:,:,idx] = inv(r/(N_steps-1) .+ transpose(B)*Sx_tp1*B .+ transpose(C)*(Sx_tp1 .+ Se_tp1)*C)*transpose(B)*Sx_tp1*A       
            cSx_tp1 = transpose(A)*Sx_tp1*(A .- B*L[:,:,idx]) .+ transpose(D)*transpose(K[:,:,idx])*Se_tp1*K[:,:,idx]*D
            Se_tp1 = transpose(A)*Sx_tp1*B*L[:,:,idx] .+ transpose(A .- K[:,:,idx]*H)*Se_tp1*(A .- K[:,:,idx]*H)
            Sx_tp1 = cSx_tp1

        end

        #OPTIMAL FILTER --> forward in time
        
        #Initial conditions
        Σe_t = zeros(state_dimension,state_dimension)
        Σxe_t = zeros(state_dimension,state_dimension)
        Σx_t = initial_state*transpose(initial_state)

        #first step for Σe_t, Σxe_t, Σx_t: the first K is zero, if we assume zero initial uncertainty on the state vector (Σ_1 = 0)
        idx = 1
        cΣe_t = Ω_ξ .+ Ω_η .+ (A .- K[:,:,idx]*H)*Σe_t*transpose(A) .+ C*L[:,:,idx]*Σx_t*transpose(L[:,:,idx])*transpose(C)
        Σx_t = Ω_η .+ K[:,:,idx]*H*Σe_t*transpose(A) .+ (A .- B*L[:,:,idx])*Σx_t*transpose((A .- B*L[:,:,idx])) .+ (A .- B*L[:,:,idx])*Σxe_t*transpose(H)*transpose(K[:,:,idx]) .+ K[:,:,idx]*H*transpose(Σxe_t)*transpose(A .- B*L[:,:,idx])
        Σe_t = cΣe_t
        Σxe_t = (A .- B*L[:,:,idx])*Σxe_t*transpose(A .- K[:,:,idx]*H) .- Ω_η

        for idx in 2:(N_steps-1)

            K[:,:,idx] = A*Σe_t*transpose(H)*inv(H*Σe_t*transpose(H) .+ Ω_ω .+ D*(Σe_t .+ Σx_t .+ Σxe_t .+ transpose(Σxe_t))*transpose(D))
            cΣe_t = Ω_ξ .+ Ω_η .+ (A .- K[:,:,idx]*H)*Σe_t*transpose(A) .+ C*L[:,:,idx]*Σx_t*transpose(L[:,:,idx])*transpose(C)
            Σx_t = Ω_η .+ K[:,:,idx]*H*Σe_t*transpose(A) .+ (A .- B*L[:,:,idx])*Σx_t*transpose((A .- B*L[:,:,idx])) .+ (A .- B*L[:,:,idx])*Σxe_t*transpose(H)*transpose(K[:,:,idx]) .+ K[:,:,idx]*H*transpose(Σxe_t)*transpose(A .- B*L[:,:,idx])
            Σe_t = cΣe_t
            Σxe_t = (A .- B*L[:,:,idx])*Σxe_t*transpose(A .- K[:,:,idx]*H) .- Ω_η

        end

    end

    #Expected cost, once control and estimator have been optimized

    Sx_tp1 = Q_N      
    Se_tp1 = zeros(state_dimension,state_dimension)
    s_small_t = 0

    for idx in (N_steps-1):-1:1

        cSx_tp1 = transpose(A)*Sx_tp1*(A .- B*L[:,:,idx]) .+ transpose(D)*transpose(K[:,:,idx])*Se_tp1*K[:,:,idx]*D
        cSe_tp1 = transpose(A)*Sx_tp1*B*L[:,:,idx] .+ transpose(A .- K[:,:,idx]*H)*Se_tp1*(A .- K[:,:,idx]*H)

        s_small_t = tr(Sx_tp1*Ω_ξ .+ Se_tp1*(Ω_ξ .+ Ω_η .+ K[:,:,idx]*Ω_ω*transpose(K[:,:,idx]))) + s_small_t

        Sx_tp1 = cSx_tp1
        Se_tp1 = cSe_tp1

    end

    expected_cost = s_small_t + transpose(initial_state)*Sx_tp1*initial_state #Note that we use Σ_1 = 0

    return .-L,K, expected_cost

end

#Same function, but for the system with the 4D state vector.
#Note that Σ_1_x is the initial covariance of the state vector, while Σ_1_z is the initial covariance for the state estimate vector.
function Todorov_optimization_reaching_task_application_4D_state(Δt, N_steps, m, tau_1, tau_2, x_0, Σ_1_x, Σ_1_z, r, w_v, w_f, σ_ξ, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_ϵ_mult_control_noise, Ω_η, N_iter = 500)
    
    #Dimensionality
    state_dimension = 4
    observation_dimension = 1

    #observation_dimension = 3

    #Define the dynamical system
    A = zeros(state_dimension,state_dimension)
    B = zeros(state_dimension,1)
    H = zeros(observation_dimension,state_dimension)
    D = zeros(observation_dimension,state_dimension)
    
    Ω_ξ = Diagonal([0, σ_ξ^2, 0, 0]) #we consider the sqrt of the covariance matrix, so we don't need to call the function sqrt at each time step iteration
    #Ω_η = Diagonal([σ_η_internal_noise^2, σ_η_internal_noise^2, σ_η_internal_noise^2, σ_η_internal_noise^2]) #Internal noise is acting only on the osberved variables (position, velocity: visual feedbacks; force: proprioception)
    Ω_ω = σ_ω_add_sensory_noise^2 #Additive sensory noise (1D sensory feedback)

    A[1,:] = [1, Δt, 0, 0]
    A[2,:] = [0, 1, Δt/m, 0]
    A[3,:] = [0, 0, 1-Δt/tau_2, Δt/tau_2]
    A[4,:] = [0, 0, 0, 1-Δt/tau_1]

    B[:] = [0, 0, 0, Δt/tau_1]
    C = σ_ϵ_mult_control_noise .* B
    H[1,:] = [1, 0, 0, 0] #we're selecting only the position as observable 
    # H[2,:] = [0, 1, 0, 0, 0]
    # H[3,:] = [0, 0, 1, 0, 0]

    D[1,:] = σ_ρ_mult_sensory_noise .* H[1,:]

    #Matrix for cost function (quadratic form)
    p_vec = [1; 0; 0; 0]
    v_vec = [0; w_v; 0; 0]
    f_vec = [0; 0; w_f; 0]

    Q_N = p_vec*transpose(p_vec) .+ v_vec*transpose(v_vec) .+ f_vec*transpose(f_vec) 
    
    #Intial state 
    initial_state = zeros(state_dimension)
    initial_state .= x_0

    Σe_t = zeros(state_dimension,state_dimension)
    Σxe_t = zeros(state_dimension,state_dimension)
    Σx_t = zeros(state_dimension,state_dimension)

    Σe_t_start = zeros(state_dimension,state_dimension)
    Σxe_t_start = zeros(state_dimension,state_dimension)
    Σx_t_start = zeros(state_dimension,state_dimension)

    Σe_t_start[:,:] .= Σ_1_x[:,:] .+ Σ_1_z[:,:]
    Σxe_t_start[:,:] .= Σ_1_z
    Σx_t_start[:,:] .= Σ_1_z .+ initial_state*transpose(initial_state)

    #OPTIMIZATION
    
    L = zeros(1,state_dimension,(N_steps-1))
    K = zeros(state_dimension,observation_dimension,(N_steps-1))

    for i in 1:N_iter

        #OPTIMAL CONTROLLER --> backwards through time

        #Initial (i.e. final) conditions
        Sx_tp1 = Q_N      
        Se_tp1 = zeros(state_dimension,state_dimension)

        #Note that for the given system the last control vector is always zero, because transpose(B)*Q_N = 0 [the last cost function doesn't include "interactions" with the control part of the dynamics].
        #We then update Se_tp1 and S_xtp1 and start the for loop from N_steps - 2 instead of N_steps - 1

        idx = N_steps-1
        cSx_tp1 = transpose(A)*Sx_tp1*(A .- B*L[:,:,idx]) .+ transpose(D)*transpose(K[:,:,idx])*Se_tp1*K[:,:,idx]*D
        Se_tp1 = transpose(A)*Sx_tp1*B*L[:,:,idx] .+ transpose(A .- K[:,:,idx]*H)*Se_tp1*(A .- K[:,:,idx]*H)
        Sx_tp1 = cSx_tp1

        for idx in (N_steps-2):-1:1

            L[:,:,idx] = inv(r/(N_steps-1) .+ transpose(B)*Sx_tp1*B .+ transpose(C)*(Sx_tp1 .+ Se_tp1)*C)*transpose(B)*Sx_tp1*A       
            cSx_tp1 = transpose(A)*Sx_tp1*(A .- B*L[:,:,idx]) .+ transpose(D)*transpose(K[:,:,idx])*Se_tp1*K[:,:,idx]*D
            Se_tp1 = transpose(A)*Sx_tp1*B*L[:,:,idx] .+ transpose(A .- K[:,:,idx]*H)*Se_tp1*(A .- K[:,:,idx]*H)
            Sx_tp1 = cSx_tp1

        end

        #OPTIMAL FILTER --> forward in time
        
        #Initial conditions --> given by function arguments 
        Σe_t[:,:] .= Σe_t_start[:,:]
        Σxe_t[:,:] .= Σxe_t_start[:,:]
        Σx_t[:,:] .= Σx_t_start[:,:]

        #first step for Σe_t, Σxe_t, Σx_t: the first K is zero, if we assume zero initial uncertainty on the state vector (Σ_1 = 0)
        idx = 1
        cΣe_t = Ω_ξ .+ Ω_η .+ (A .- K[:,:,idx]*H)*Σe_t*transpose(A) .+ C*L[:,:,idx]*Σx_t*transpose(L[:,:,idx])*transpose(C)
        Σx_t = Ω_η .+ K[:,:,idx]*H*Σe_t*transpose(A) .+ (A .- B*L[:,:,idx])*Σx_t*transpose((A .- B*L[:,:,idx])) .+ (A .- B*L[:,:,idx])*Σxe_t*transpose(H)*transpose(K[:,:,idx]) .+ K[:,:,idx]*H*transpose(Σxe_t)*transpose(A .- B*L[:,:,idx])
        Σe_t = cΣe_t
        Σxe_t = (A .- B*L[:,:,idx])*Σxe_t*transpose(A .- K[:,:,idx]*H) .- Ω_η

        for idx in 2:(N_steps-1)

            K[:,:,idx] = A*Σe_t*transpose(H)*inv(H*Σe_t*transpose(H) .+ Ω_ω .+ D*(Σe_t .+ Σx_t .+ Σxe_t .+ transpose(Σxe_t))*transpose(D))
            cΣe_t = Ω_ξ .+ Ω_η .+ (A .- K[:,:,idx]*H)*Σe_t*transpose(A) .+ C*L[:,:,idx]*Σx_t*transpose(L[:,:,idx])*transpose(C)
            Σx_t = Ω_η .+ K[:,:,idx]*H*Σe_t*transpose(A) .+ (A .- B*L[:,:,idx])*Σx_t*transpose((A .- B*L[:,:,idx])) .+ (A .- B*L[:,:,idx])*Σxe_t*transpose(H)*transpose(K[:,:,idx]) .+ K[:,:,idx]*H*transpose(Σxe_t)*transpose(A .- B*L[:,:,idx])
            Σe_t = cΣe_t
            Σxe_t = (A .- B*L[:,:,idx])*Σxe_t*transpose(A .- K[:,:,idx]*H) .- Ω_η

        end

    end

    #Expected cost, once control and estimator have been optimized

    Sx_tp1 = Q_N      
    Se_tp1 = zeros(state_dimension,state_dimension)
    s_small_t = 0

    for idx in (N_steps-1):-1:1

        cSx_tp1 = transpose(A)*Sx_tp1*(A .- B*L[:,:,idx]) .+ transpose(D)*transpose(K[:,:,idx])*Se_tp1*K[:,:,idx]*D
        cSe_tp1 = transpose(A)*Sx_tp1*B*L[:,:,idx] .+ transpose(A .- K[:,:,idx]*H)*Se_tp1*(A .- K[:,:,idx]*H)

        s_small_t = tr(Sx_tp1*Ω_ξ .+ Se_tp1*(Ω_ξ .+ Ω_η .+ K[:,:,idx]*Ω_ω*transpose(K[:,:,idx]))) + s_small_t

        Sx_tp1 = cSx_tp1
        Se_tp1 = cSe_tp1

    end

    #expected_cost = s_small_t + transpose(initial_state)*Sx_tp1*initial_state #Note that we use Σ_1 = 0
    expected_cost = s_small_t + transpose(initial_state)*Sx_tp1*initial_state + tr((Sx_tp1 .+ Se_tp1)*Σ_1_x) + tr(Se_tp1*Σ_1_z)

    return .-L,K, expected_cost

end

#Function to compute the expected value using Todorov's approach
function Todorov_expectation_reaching_task_application_4D_state(K, L, Δt, N_steps, m, tau_1, tau_2, x_0, r, w_v, w_f, σ_ξ, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_ϵ_mult_control_noise, Ω_η)
    
    #Dimensionality
    state_dimension = 4
    observation_dimension = 1

    #observation_dimension = 3

    #Define the dynamical system
    A = zeros(state_dimension,state_dimension)
    B = zeros(state_dimension,1)
    H = zeros(observation_dimension,state_dimension)
    D = zeros(observation_dimension,state_dimension)
    
    Ω_ξ = Diagonal([0, σ_ξ^2, 0, 0]) #we consider the sqrt of the covariance matrix, so we don't need to call the function sqrt at each time step iteration
    #Ω_η = Diagonal([σ_η_internal_noise^2, 0, 0, 0]) #Internal noise is acting only on the osberved variables (position, velocity: visual feedbacks; force: proprioception)
    Ω_ω = σ_ω_add_sensory_noise^2 #Additive sensory noise (1D sensory feedback)

    A[1,:] = [1, Δt, 0, 0]
    A[2,:] = [0, 1, Δt/m, 0]
    A[3,:] = [0, 0, 1-Δt/tau_2, Δt/tau_2]
    A[4,:] = [0, 0, 0, 1-Δt/tau_1]

    B[:] = [0, 0, 0, Δt/tau_1]
    C = σ_ϵ_mult_control_noise .* B
    H[1,:] = [1, 0, 0, 0] #we're selecting only the position as observable 
    # H[2,:] = [0, 1, 0, 0, 0]
    # H[3,:] = [0, 0, 1, 0, 0]

    D[1,:] = σ_ρ_mult_sensory_noise .* H[1,:]

    #Matrix for cost function (quadratic form)
    p_vec = [1; 0; 0; 0]
    v_vec = [0; w_v; 0; 0]
    f_vec = [0; 0; w_f; 0]

    Q_N = p_vec*transpose(p_vec) .+ v_vec*transpose(v_vec) .+ f_vec*transpose(f_vec) 
    
    #Intial state 
    initial_state = zeros(state_dimension)
    initial_state .= x_0 

    #Expected cost, once control and estimator have been optimized

    Sx_tp1 = Q_N      
    Se_tp1 = zeros(state_dimension,state_dimension)
    s_small_t = 0

    for idx in (N_steps-1):-1:1

        cSx_tp1 = transpose(A)*Sx_tp1*(A .- B*L[:,:,idx]) .+ transpose(D)*transpose(K[:,:,idx])*Se_tp1*K[:,:,idx]*D
        cSe_tp1 = transpose(A)*Sx_tp1*B*L[:,:,idx] .+ transpose(A .- K[:,:,idx]*H)*Se_tp1*(A .- K[:,:,idx]*H)

        s_small_t = tr(Sx_tp1*Ω_ξ .+ Se_tp1*(Ω_ξ .+ Ω_η .+ K[:,:,idx]*Ω_ω*transpose(K[:,:,idx]))) + s_small_t

        Sx_tp1 = cSx_tp1
        Se_tp1 = cSe_tp1

    end

    expected_cost = s_small_t + transpose(initial_state)*Sx_tp1*initial_state #Note that we use Σ_1 = 0

    return expected_cost

end

##GD OPTIMIZATION

#The function computes the value expected to accumulate over a trial. Use it to make predictions; to run the GD optimization use the next function.
#Note that here we allow for initial variance of state and state estimate
function moments_propagation_reaching_task_application_4D_state(K, L, Δt, N_steps, m, tau_1, tau_2, x_0, Σ_1_x, Σ_1_z, r, w_v, w_f, σ_ξ, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_ϵ_mult_control_noise, Ω_η)
    
    #NOTE that these variables: "σ_ξ, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_ϵ_mult_control_noise, σ_η" are all STANDARD DEVIATIONS.
    #Note that we assume senosry dependent noise acting only on the position feedback (due to eccentricity). The additive noise σ_ξ only affects the velocity 
    #to model and additive noise coming from the control to force mapping (we put it there so that it's not filtered by the second order filter)
    
    #Dimensionality
    state_dimension = 4
    observation_dimension = 1

    #observation_dimension = 3

    #Define the dynamical system
    A = zeros(state_dimension,state_dimension)
    B = zeros(state_dimension,1)
    H = zeros(observation_dimension,state_dimension)
    D = zeros(observation_dimension,state_dimension)
    
    Ω_ξ = Diagonal([0, σ_ξ^2, 0, 0]) #we consider the sqrt of the covariance matrix, so we don't need to call the function sqrt at each time step iteration
    #Ω_η = Diagonal([σ_η_internal_noise^2, 0, 0, 0]) #Internal noise is acting only on the osberved variables (position, velocity: visual feedbacks; force: proprioception)
    Ω_ω = σ_ω_add_sensory_noise^2 #Additive sensory noise (1D sensory feedback)

    A[1,:] = [1, Δt, 0, 0]
    A[2,:] = [0, 1, Δt/m, 0]
    A[3,:] = [0, 0, 1-Δt/tau_2, Δt/tau_2]
    A[4,:] = [0, 0, 0, 1-Δt/tau_1]

    B[:] = [0, 0, 0, Δt/tau_1]
    C = σ_ϵ_mult_control_noise .* B
    H[1,:] = [1, 0, 0, 0] #we're selecting only the position as observable 
    # H[2,:] = [0, 1, 0, 0, 0]
    # H[3,:] = [0, 0, 1, 0, 0]

    D[1,:] = σ_ρ_mult_sensory_noise .* H[1,:]

    #Matrix for cost function (quadratic form)
    p_vec = [1; 0; 0; 0]
    v_vec = [0; w_v; 0; 0]
    f_vec = [0; 0; w_f; 0]

    Q_N = p_vec*transpose(p_vec) .+ v_vec*transpose(v_vec) .+ f_vec*transpose(f_vec) 
    
    #Initial Conditions. Note that we update <x> (E_x), <x_hat> (E_x_hat), <x*transpose(x)> (E_xx), <x_hat*transpose(x_hat)> (E_x_hat_x_hat) and
    # <x*transpose(x_hat)> (E_x_x_hat), to get the expected accumulated cost (that requires the knowledge of the mean and covariances of x and x_hat)
    E_x = zeros(state_dimension)
    E_x_hat = zeros(state_dimension)
    
    E_x .= x_0
    E_x_hat .= x_0

    E_xx = zeros(state_dimension, state_dimension)
    E_x_hat_x_hat = zeros(state_dimension, state_dimension)
    E_x_x_hat = zeros(state_dimension, state_dimension)

    E_xx = E_x*transpose(E_x) .+ Σ_1_x
    E_x_hat_x_hat = E_x*transpose(E_x) .+ Σ_1_z
    E_x_x_hat = E_x*transpose(E_x)

    cost = 0
    cov_x_hat = 0

    for i in 2:N_steps

        cost += (r/(N_steps-1).*(L[:, :, i-1]*E_x_hat).^2)[] .+ (r/(N_steps-1).*(L[:, :, i-1]*cov_x_hat*transpose(L[:, :, i-1])))[]
        
        E_x_new = A*E_x .+ B*(L[:, :, i-1]*E_x_hat)
        E_x_hat_new = A*E_x_hat .+ B*(L[:, :, i-1]*E_x_hat) .+ K[:, :, i-1]*H*(E_x .- E_x_hat)

        E_xx_new = A*E_xx*transpose(A) .+ B*L[:, :, i-1]*E_x_hat_x_hat*transpose(L[:, :, i-1])*transpose(B) .+ C*L[:, :, i-1]*E_x_hat_x_hat*transpose(L[:, :, i-1])*transpose(C) .+ A*E_x_x_hat*transpose(L[:, :, i-1])*transpose(B) .+ transpose(A*E_x_x_hat*transpose(L[:, :, i-1])*transpose(B)) .+ Ω_ξ    
        E_x_hat_x_hat_new = K[:, :, i-1]*H*E_xx*transpose(H)*transpose(K[:, :, i-1]) .+ K[:, :, i-1]*D*E_xx*transpose(D)*transpose(K[:, :, i-1]) .+ A*E_x_hat_x_hat*(transpose(A) .+ transpose(L[:, :, i-1])*transpose(B) .- transpose(H)*transpose(K[:, :, i-1])) .+ B*L[:, :, i-1]*E_x_hat_x_hat*(transpose(L[:, :, i-1])*transpose(B) .+ transpose(A) .- transpose(H)*transpose(K[:, :, i-1])) .+ K[:, :, i-1]*H*E_x_hat_x_hat*(transpose(H)*transpose(K[:, :, i-1]) .- transpose(A) .- transpose(L[:, :, i-1])*transpose(B)) .+ K[:, :, i-1]*H*E_x_x_hat*(transpose(A) .+ transpose(L[:, :, i-1])*transpose(B) .- transpose(H)*transpose(K[:, :, i-1])) .+ (B*L[:, :, i-1] .- K[:, :, i-1]*H .+ A)*transpose(E_x_x_hat)*transpose(H)*transpose(K[:, :, i-1]) .+ K[:, :, i-1]*Ω_ω*transpose(K[:, :, i-1]) .+ Ω_η   
        
        E_x_x_hat_new = A*E_xx*transpose(H)*transpose(K[:, :, i-1]) .+ B*L[:, :, i-1]*E_x_hat_x_hat*(transpose(A) .+ transpose(L[:, :, i-1])*transpose(B) .- transpose(H)*transpose(K[:, :, i-1])) .+ A*E_x_x_hat*(transpose(A) .+ transpose(L[:, :, i-1])*transpose(B) .- transpose(H)*transpose(K[:, :, i-1])) .+ B*L[:, :, i-1]*transpose(E_x_x_hat)*transpose(H)*transpose(K[:, :, i-1])

        cov_x_hat = E_x_hat_x_hat_new .- E_x_hat_new*transpose(E_x_hat_new)

        E_x .= E_x_new
        E_x_hat .= E_x_hat_new
        E_xx .= E_xx_new
        E_x_hat_x_hat .= E_x_hat_x_hat_new
        E_x_x_hat .= E_x_x_hat_new

    end

    cov_x = E_xx .- E_x*transpose(E_x)
    cost += transpose(E_x)*Q_N*E_x .+ tr(Q_N*cov_x) 
    
    return cost

end

#Same function as before, but slighly optimized to run the GD minimization
function moments_propagation_reaching_task_application_4D_state_GD_optimization(x, A, B, C, D, H, Ω_ξ, Ω_η, Ω_ω, Q_N, r, x_state_0, Σ_1_x, Σ_1_z, N_steps)

    #Dimensionality
    state_dimension = 4
    observation_dimension = 1
    
    #x = Vector(1:state_dimension*(2*N_steps - 2)) this is the vector to be optimized
    K = reshape(x[1:state_dimension*(N_steps - 1)], state_dimension, observation_dimension, N_steps - 1)
    L = reshape(x[state_dimension*(N_steps - 1)+1:end], 1, state_dimension, N_steps - 1)

    #Initial Conditions. Note that we update <x> (E_x), <x_hat> (E_x_hat), <x*transpose(x)> (E_xx), <x_hat*transpose(x_hat)> (E_x_hat_x_hat) and
    # <x*transpose(x_hat)> (E_x_x_hat), to get the expected accumulated cost (that requires the knowledge of the mean and covariances of x and x_hat)
    E_x = zeros(state_dimension)
    E_x_hat = zeros(state_dimension)
    
    E_x .= x_state_0
    E_x_hat .= x_state_0

    E_xx = zeros(state_dimension, state_dimension)
    E_x_hat_x_hat = zeros(state_dimension, state_dimension)
    E_x_x_hat = zeros(state_dimension, state_dimension)

    E_xx = E_x*transpose(E_x) .+ Σ_1_x
    E_x_hat_x_hat = E_x*transpose(E_x) .+ Σ_1_z
    E_x_x_hat = E_x*transpose(E_x)

    cost = 0
    cov_x_hat = 0

    for i in 2:N_steps

        cost += (r/(N_steps-1).*(L[:, :, i-1]*E_x_hat).^2)[] .+ (r/(N_steps-1).*(L[:, :, i-1]*cov_x_hat*transpose(L[:, :, i-1])))[]
        
        E_x_new = A*E_x .+ B*(L[:, :, i-1]*E_x_hat)
        E_x_hat_new = A*E_x_hat .+ B*(L[:, :, i-1]*E_x_hat) .+ K[:, :, i-1]*H*(E_x .- E_x_hat)

        E_xx_new = A*E_xx*transpose(A) .+ B*L[:, :, i-1]*E_x_hat_x_hat*transpose(L[:, :, i-1])*transpose(B) .+ C*L[:, :, i-1]*E_x_hat_x_hat*transpose(L[:, :, i-1])*transpose(C) .+ A*E_x_x_hat*transpose(L[:, :, i-1])*transpose(B) .+ transpose(A*E_x_x_hat*transpose(L[:, :, i-1])*transpose(B)) .+ Ω_ξ    
        E_x_hat_x_hat_new = K[:, :, i-1]*H*E_xx*transpose(H)*transpose(K[:, :, i-1]) .+ K[:, :, i-1]*D*E_xx*transpose(D)*transpose(K[:, :, i-1]) .+ A*E_x_hat_x_hat*(transpose(A) .+ transpose(L[:, :, i-1])*transpose(B) .- transpose(H)*transpose(K[:, :, i-1])) .+ B*L[:, :, i-1]*E_x_hat_x_hat*(transpose(L[:, :, i-1])*transpose(B) .+ transpose(A) .- transpose(H)*transpose(K[:, :, i-1])) .+ K[:, :, i-1]*H*E_x_hat_x_hat*(transpose(H)*transpose(K[:, :, i-1]) .- transpose(A) .- transpose(L[:, :, i-1])*transpose(B)) .+ K[:, :, i-1]*H*E_x_x_hat*(transpose(A) .+ transpose(L[:, :, i-1])*transpose(B) .- transpose(H)*transpose(K[:, :, i-1])) .+ (B*L[:, :, i-1] .- K[:, :, i-1]*H .+ A)*transpose(E_x_x_hat)*transpose(H)*transpose(K[:, :, i-1]) .+ K[:, :, i-1]*Ω_ω*transpose(K[:, :, i-1]) .+ Ω_η   
        
        E_x_x_hat_new = A*E_xx*transpose(H)*transpose(K[:, :, i-1]) .+ B*L[:, :, i-1]*E_x_hat_x_hat*(transpose(A) .+ transpose(L[:, :, i-1])*transpose(B) .- transpose(H)*transpose(K[:, :, i-1])) .+ A*E_x_x_hat*(transpose(A) .+ transpose(L[:, :, i-1])*transpose(B) .- transpose(H)*transpose(K[:, :, i-1])) .+ B*L[:, :, i-1]*transpose(E_x_x_hat)*transpose(H)*transpose(K[:, :, i-1])

        cov_x_hat = E_x_hat_x_hat_new .- E_x_hat_new*transpose(E_x_hat_new)

        E_x = E_x_new
        E_x_hat = E_x_hat_new
        E_xx = E_xx_new
        E_x_hat_x_hat = E_x_hat_x_hat_new
        E_x_x_hat = E_x_x_hat_new

    end

    cov_x = E_xx .- E_x*transpose(E_x)
    cost += transpose(E_x)*Q_N*E_x .+ tr(Q_N*cov_x) 
    
    return cost

end



