#Functions needed for the 1D case for Todorov's and Numerical GD algorithms.

#The function implements Todorov's algorithm to derive the optimal filter and control gains iteratively for the 1D case, see (Todorov, 2005).
function K_L_Todorov_optimization(iterations, x_z_0_mean, Σ_x_0, Σ_z_0, k_0, l_0, a, b, H, r, q, q_T, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)

    #we compute the optimal control law and then the optimal estimator, until they're both optimized
    
    #initial conditions for the control and estimator vectors
    k_opt = k_0 .* ones(T-1)
    l_opt = l_0 .* ones(T-1)

    #initial conditions for the optimal estimator
    if (Σ_x_0 == 0) && (Σ_z_0 == 0)    
       
        Σ_e_start = 0
        Σ_z_start = x_z_0_mean^2
        Σ_ze_start = 0

    elseif (Σ_x_0 == 0)              #known initial state, x_z_0_mean. The initial state estimate is drawn from a guassian distribution with mean x_z_0_mean and variance Σ_z_0.

        Σ_e_start = Σ_z_0
        Σ_z_start = Σ_z_0 + x_z_0_mean^2
        Σ_ze_start = -Σ_z_0
    
    elseif (Σ_z_0 == 0)         #known initial state estimate, x_z_0_mean. The initial state is drawn from a guassian distribution with mean x_z_0_mean and variance Σ_x_0.
                                    # this is the situation considered in (Todorov, 2005)
        Σ_e_start = Σ_x_0
        Σ_z_start = x_z_0_mean^2
        Σ_ze_start = 0

    end                         

    #OPTIMIZATION

    for j in 1:iterations

        #the optimal control law is computed recursively backward in time

        l_opt[end] = (1/(r+b^2*q_T+σ_ϵ_control_dep_noise^2*q_T))*b*q_T*a 
        s_x = q+a*q_T*(a-b*l_opt[end])
        s_e = a*q_T*b*l_opt[end]
        
        for i in 2:T-1
            l_opt[T-i] = (1/(r+b^2*s_x+σ_ϵ_control_dep_noise^2*(s_x+s_e)))*b*s_x*a
            s_x_old = s_x 
            s_e_old = s_e 
            s_x = q+a*s_x_old*(a-b*l_opt[T-i]) + σ_ρ_mult_sensory_noise^2*k_opt[T-i]^2*s_e
            s_e = a*s_x_old*b*l_opt[T-i]+(a-k_opt[T-i]*H)^2*s_e_old
        end

        #the optimal estimator is computed in a forward pass through time                                
        
        Σ_e = Σ_e_start
        Σ_z = Σ_z_start 
        Σ_ze = Σ_ze_start

        for i in 1:T-1
            k_opt[i] = a*Σ_e*H*1/(H^2*Σ_e + σ_ω_add_sensory_noise^2 + σ_ρ_mult_sensory_noise^2*(Σ_e + Σ_z + 2*Σ_ze))  
            Σ_e_old = Σ_e
            Σ_ze_old = Σ_ze
            Σ_e = σ_ξ^2 + σ_η_internal_noise^2 + (a-k_opt[i]*H)*Σ_e_old*a + σ_ϵ_control_dep_noise^2*l_opt[i]^2*Σ_z
            Σ_z = σ_η_internal_noise^2 + k_opt[i]*H*Σ_e_old*a + (a-b*l_opt[i])^2*Σ_z + 2*(a-b*l_opt[i])*Σ_ze*H*k_opt[i]
            Σ_ze = (a-b*l_opt[i])*Σ_ze_old*(a-k_opt[i]*H) - σ_η_internal_noise^2
        end

    end

    #Here there should be a last optimization for the optimal controller, in order to compute the expected accumulated cost at the end of the 
    # whole optimization process. However, this step could be non relevant, given that probably we already reached the convergence point of 
    # the algorithm, if we used a large enough number of iterations. Heere, differenly from the previous function we make this further step

    #(the optimal control law is computed recursively backward in time)

    l_opt[end] = (1/(r+b^2*q_T+σ_ϵ_control_dep_noise^2*q_T))*b*q_T*a 
    s_x = q+a*q_T*(a-b*l_opt[end])
    s_e = a*q_T*b*l_opt[end]
    
    for i in 2:T-1
        l_opt[T-i] = (1/(r+b^2*s_x+σ_ϵ_control_dep_noise^2*(s_x+s_e)))*b*s_x*a
        s_x_old = s_x 
        s_e_old = s_e 
        s_x = q+a*s_x_old*(a-b*l_opt[T-i]) + σ_ρ_mult_sensory_noise^2*k_opt[T-i]^2*s_e
        s_e = a*s_x_old*b*l_opt[T-i]+(a-k_opt[T-i]*H)^2*s_e_old

    end

    return k_opt, -l_opt
       
end

#The function computes the optimal control law at fixed filter gains in the 1D case, see (Todorov, 2005).
function L_optimal_Todorov_CONTROL(l_0, k_opt, a, b, H, r, q, q_T, σ_ϵ_control_dep_noise, σ_ρ_mult_sensory_noise, T)
    
    #initial conditions for the control vector
    l_opt = l_0 .* ones(T-1)

    #OPTIMIZATION
    #the optimal control law is computed recursively backward in time

    l_opt[end] = (1/(r+b^2*q_T+σ_ϵ_control_dep_noise^2*q_T))*b*q_T*a 
    s_x = q+a*q_T*(a-b*l_opt[end])
    s_e = a*q_T*b*l_opt[end]
    
    for i in 2:T-1
        l_opt[T-i] = (1/(r+b^2*s_x+σ_ϵ_control_dep_noise^2*(s_x+s_e)))*b*s_x*a
        s_x_old = s_x 
        s_e_old = s_e 
        s_x = q+a*s_x_old*(a-b*l_opt[T-i]) + σ_ρ_mult_sensory_noise^2*k_opt[T-i]^2*s_e
        s_e = a*s_x_old*b*l_opt[T-i]+(a-k_opt[T-i]*H)^2*s_e_old
    end

    return -l_opt
       
end

#The function computes the optimal filter gains at fixed control law in the 1D case, see (Todorov, 2005).
function K_optimal_Todorov_ESTIMATOR(x_z_0_mean, Σ_x_0, Σ_z_0, l_opt, a, b, H, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise, T)
    
    #initial conditions for the "estimator" vector
    k_opt = ones(T-1)

    #initial conditions for the optimal estimator
    if (Σ_x_0 == 0) && (Σ_z_0 == 0)    
       
        Σ_e_start = 0
        Σ_z_start = x_z_0_mean^2
        Σ_ze_start = 0

    elseif (Σ_x_0 == 0)              #known initial state, x_z_0_mean. The initial state estimate is drawn from a guassian distribution with mean x_z_0_mean and variance Σ_z_0.

        Σ_e_start = Σ_z_0
        Σ_z_start = Σ_z_0 + x_z_0_mean^2
        Σ_ze_start = -Σ_z_0
    
    elseif (Σ_z_0 == 0)         #known initial state estimate, x_z_0_mean. The initial state is drawn from a guassian distribution with mean x_z_0_mean and variance Σ_x_0.
                                    # this is the situation considered in (Todorov, 2005)
        Σ_e_start = Σ_x_0
        Σ_z_start = x_z_0_mean^2
        Σ_ze_start = 0

    end                         

    #OPTIMIZATION
    #the optimal estimator is computed in a forward pass through time                                
    
    Σ_e = Σ_e_start
    Σ_z = Σ_z_start 
    Σ_ze = Σ_ze_start

    for i in 1:T-1
        k_opt[i] = a*Σ_e*H*1/(H^2*Σ_e + σ_ω_add_sensory_noise^2 + σ_ρ_mult_sensory_noise^2*(Σ_e + Σ_z + 2*Σ_ze))  
        Σ_e_old = Σ_e
        Σ_ze_old = Σ_ze
        Σ_e = σ_ξ^2 + σ_η_internal_noise^2 + (a-k_opt[i]*H)*Σ_e_old*a + σ_ϵ_control_dep_noise^2*l_opt[i]^2*Σ_z
        Σ_z = σ_η_internal_noise^2 + k_opt[i]*H*Σ_e_old*a + (a-b*l_opt[i])^2*Σ_z + 2*(a-b*l_opt[i])*Σ_ze*H*k_opt[i]
        Σ_ze = (a-b*l_opt[i])*Σ_ze_old*(a-k_opt[i]*H) - σ_η_internal_noise^2
    end

    return k_opt
       
end


#The function computes the accumulated cost by simulating the dynamics and taking the average over several realizations.
function cost_MC_sampling(realizations, seed_MC, Σ_x_0, Σ_z_0, μ_x_0, μ_z_0, T, q, q_T, r, a, b, m, n, H, l_vec, k_vec, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)

    cost_each_realization = zeros(realizations)
    
    #here we can choose the seed, to have reproducible results, if needed. By using time_ns() we change the seed every time we run the simulation, to be sure our 
    # results are not due to the particular sample we use. Note that randn() uses the MT algorithm to generate random numbers.
    #Random.seed!(time_ns()) 
    
    Random.seed!(seed_MC)
    
    #Threads.@threads 
    Threads.@threads for j in 1:realizations
       
        #initial conditions
        x_0 = sqrt(Σ_x_0)*randn()+ μ_x_0
        z_0 = sqrt(Σ_z_0)*randn()+ μ_z_0
        
        #dynamics variables
        state_vec = zeros(2,1)
        state_vec[:] = [x_0, z_0]   #state vector                                                                   
        old_state_vec = zeros(2,1)
        old_state_vec .= state_vec
        #cost_each_realization[j] = q*x_0^2        #if we want to include the initial cost of being initially far from the target (like in Todorov)

        for i in 2:T-1
            
            #The noise column contains the 5 noise terms of the dynamics (for one time step) in this order: ξ,ϵ,ω,ρ,η. They are all gaussian random numbers with 0 mean 
            # and unitary std. In the dynamics we multiply them with the right factors to have the right std per each noise source.
            noise_column = randn(5)

            observation_temp = H*old_state_vec[1] + σ_ω_add_sensory_noise*noise_column[3] + σ_ρ_mult_sensory_noise*noise_column[4]*old_state_vec[1]
            state_vec[1] = a*old_state_vec[1] + (b*l_vec[i-1] + σ_ϵ_control_dep_noise*noise_column[2]*l_vec[i-1])*old_state_vec[2] + σ_ξ*noise_column[1] 
            state_vec[2] = (m+n*l_vec[i-1])*old_state_vec[2] + k_vec[i-1]*(observation_temp - H*old_state_vec[2]) + σ_η_internal_noise*noise_column[5]
            
            state_temp_for_cost = [(state_vec[1])^2, old_state_vec[2]^2]
            cost_params_temp = [q,r*l_vec[i-1]^2]
            cost_each_realization[j] += dot(state_temp_for_cost,cost_params_temp) 
            
            old_state_vec .= state_vec

        end

        #last update to compute the last cost
        noise_column = randn(5)
            
        observation_temp = H*old_state_vec[1] + σ_ω_add_sensory_noise*noise_column[3] + σ_ρ_mult_sensory_noise*noise_column[4]*old_state_vec[1]
        state_vec[1] = a*old_state_vec[1] + (b*l_vec[T-1] + σ_ϵ_control_dep_noise*noise_column[2]*l_vec[T-1])*old_state_vec[2] + σ_ξ*noise_column[1] 
        state_vec[2] = (m+n*l_vec[T-1])*old_state_vec[2] + k_vec[T-1]*(observation_temp - H*old_state_vec[2]) + σ_η_internal_noise*noise_column[5]
            
        state_temp_for_cost = [(state_vec[1])^2, old_state_vec[2]^2]
        cost_params_temp = [q_T,r*l_vec[T-1]^2]
        cost_each_realization[j] += dot(state_temp_for_cost,cost_params_temp)

    end

    average_cost = mean(cost_each_realization)
    #average_cost = mean(filter(y -> !isnan(y) && !isinf(y) && y < 20, cost_each_realization))  #skipping nans and Inf to compute the mean
    
    std_cost = std(cost_each_realization)
    #std_cost = std(filter(y -> !isnan(y) && !isinf(y) && y < 20, cost_each_realization))
    #constrained_data = count(y -> !isnan(y) && !isinf(y) && y < 20, cost_each_realization)

    return average_cost, std_cost/sqrt(realizations), cost_each_realization

end

#Function to compute the expected cost by propagating the needed moments of state and state estimate. The system supports multiplicative noise, both control and signal dependent,
# and sensory noise, both multiplicative and additive. Also internal noise in the computation of the state estimate is considered.
# Here we use a 1D model, for a preliminary study. The Kalman filter used for the state estimate is assumed to be non-adaptive,
# i.e. it can't depend on state estimate, but just on the time step. Note that now the vector w is defined in this way:
# w={E[x^2],E[z^2],E[xz]}. 
#Use this function as the objective function for the Numerical GD optimization, where x is the variable to be optimized. Note that k_vec = x[1:T-1], l_vec = x[T:2*(T-1)].
#NOTE that these variables: "σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise" are all STANDARD DEVIATIONS
#while, these other variables: "Σ_x_0, Σ_z_0" are VARIANCES 
function expected_cost_moments_propagation(Σ_x_0, Σ_z_0, μ_x_0, μ_z_0, T, q, q_T, r, a, b, m, n, H, x, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)

    k_vec = x[1:T-1]
    l_vec = x[T:2*(T-1)]

    #define the covariance vector, the updating matrix and the additive vector G for the discrete evolution of the system
    T_matrix = zeros(3,3)
    w =zeros(3,1)
    G = zeros(3,1)

    w = [μ_x_0^2 + Σ_x_0, μ_z_0^2 + Σ_z_0, μ_x_0*μ_z_0] #assuming that at the initial time step, the state and its estimate are independent
    
    T_matrix[1,:] = [a^2, (b^2*l_vec[1]^2+σ_ϵ_control_dep_noise^2*l_vec[1]^2), 2*a*b*l_vec[1]]
    T_matrix[2,:] = [k_vec[1]^2*(H^2+σ_ρ_mult_sensory_noise^2), (m+n*l_vec[1]-k_vec[1]*H)^2, 2*(m+n*l_vec[1]-k_vec[1]*H)*k_vec[1]*H]
    T_matrix[3,:] = [a*k_vec[1]*H, b*l_vec[1]*(m+n*l_vec[1]-k_vec[1]*H), a*(m+n*l_vec[1]-k_vec[1]*H)+b*l_vec[1]*k_vec[1]*H]
    
    G = [σ_ξ^2, k_vec[1]^2*σ_ω_add_sensory_noise^2+σ_η_internal_noise^2, 0]

    Ω = [w[1],w[2]]
    Γ = [q,r*l_vec[1]^2]

    cost = 0
    cost += q*(Σ_x_0 + μ_x_0^2)        #if we want to include the intial cost of being far from the target (like in Todorov)

    for i in 2:T-1
        
        w = T_matrix*w .+ G    #dynamics
        
        #update the vector G for the time evolution
        G[2] = k_vec[i]^2*σ_ω_add_sensory_noise^2+σ_η_internal_noise^2 
        
        #update the matrix T for the time evolution
        T_matrix[1,:] = [a^2, (b^2*l_vec[i]^2+σ_ϵ_control_dep_noise^2*l_vec[i]^2), 2*a*b*l_vec[i]]
        T_matrix[2,:] = [k_vec[i]^2*(H^2+σ_ρ_mult_sensory_noise^2), (m+n*l_vec[i]-k_vec[i]*H)^2, 2*(m+n*l_vec[i]-k_vec[i]*H)*k_vec[i]*H]
        T_matrix[3,:] = [a*k_vec[i]*H, b*l_vec[i]*(m+n*l_vec[i]-k_vec[i]*H), a*(m+n*l_vec[i]-k_vec[i]*H)+b*l_vec[i]*k_vec[i]*H]
        
        Ω[1] = w[1]
        cost += dot(Ω,Γ)        #cost
        Ω[2] = w[2]             #the cost of control is delayed of one time step, in order not to have the cost of the last control, which doesn't make sense conceptually
        Γ[2] = r*l_vec[i]^2 
        
    end

    w = T_matrix*w .+ G         #last evolution
    Ω[1] = w[1]            
    Γ[1] = q_T  #cost at the final time step has a different q, q=q_T !
    cost += dot(Ω,Γ)            #cost
    
    return cost

end

#The function computes the expected accumulated cost, by propagating the needed moments of state and state estimate. Same function as "expected_cost_moments_propagation", with the 
#only difference that k and l can be given as separate arguments, without the need to join them in a single vector, as done in "expected_cost_moments_propagation". Use this function 
#to simply compute the expected cost, and not in the Numerical GD optimization.
#NOTE that these variables: "σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise" are all STANDARD DEVIATIONS
#while, these other variables: "Σ_x_0, Σ_z_0" are VARIANCES.
function expected_cost_moments_propagation_k_l_separated(Σ_x_0, Σ_z_0, μ_x_0, μ_z_0, T, q, q_T, r, a, b, m, n, H, k_vec, l_vec, σ_ξ, σ_ϵ_control_dep_noise, σ_ω_add_sensory_noise, σ_ρ_mult_sensory_noise, σ_η_internal_noise)

    #define the covariance vector, the updating matrix and the additive vector G for the discrete evolution of the system
    T_matrix = zeros(3,3)
    w = zeros(3,1)
    G = zeros(3,1)

    w = [μ_x_0^2 + Σ_x_0, μ_z_0^2 + Σ_z_0, μ_x_0*μ_z_0] #assuming that at the initial time step, the state and its estimate are independent
    
    T_matrix[1,:] = [a^2, (b^2*l_vec[1]^2+σ_ϵ_control_dep_noise^2*l_vec[1]^2), 2*a*b*l_vec[1]]
    T_matrix[2,:] = [k_vec[1]^2*(H^2+σ_ρ_mult_sensory_noise^2), (m+n*l_vec[1]-k_vec[1]*H)^2, 2*(m+n*l_vec[1]-k_vec[1]*H)*k_vec[1]*H]
    T_matrix[3,:] = [a*k_vec[1]*H, b*l_vec[1]*(m+n*l_vec[1]-k_vec[1]*H), a*(m+n*l_vec[1]-k_vec[1]*H)+b*l_vec[1]*k_vec[1]*H]
    
    G = [σ_ξ^2, k_vec[1]^2*σ_ω_add_sensory_noise^2+σ_η_internal_noise^2, 0]

    Ω = [w[1],w[2]]
    Γ = [q,r*l_vec[1]^2]

    cost = 0
    #cost += q*(Σ_x_0 + μ_x_0^2)        #if we want to include the intial cost of being far from the target (like in Todorov)

    for i in 2:T-1
        
        w = T_matrix*w .+ G    #dynamics
        
        #update the vector G for the time evolution
        G[2] = k_vec[i]^2*σ_ω_add_sensory_noise^2+σ_η_internal_noise^2 
        
        #update the matrix T for the time evolution
        T_matrix[1,:] = [a^2, (b^2*l_vec[i]^2+σ_ϵ_control_dep_noise^2*l_vec[i]^2), 2*a*b*l_vec[i]]
        T_matrix[2,:] = [k_vec[i]^2*(H^2+σ_ρ_mult_sensory_noise^2), (m+n*l_vec[i]-k_vec[i]*H)^2, 2*(m+n*l_vec[i]-k_vec[i]*H)*k_vec[i]*H]
        T_matrix[3,:] = [a*k_vec[i]*H, b*l_vec[i]*(m+n*l_vec[i]-k_vec[i]*H), a*(m+n*l_vec[i]-k_vec[i]*H)+b*l_vec[i]*k_vec[i]*H]
        
        Ω[1] = w[1]
        cost += dot(Ω,Γ)        #cost
        Ω[2] = w[2]             #the cost of control is delayed of one time step, in order not to have the cost of the last control, which doesn't make sense conceptually
        Γ[2] = r*l_vec[i]^2 
        
    end

    w = T_matrix*w .+ G         #last evolution
    Ω[1] = w[1]            
    Γ[1] = q_T  #cost at the final time step has a different q, q=q_T !
    cost += dot(Ω,Γ)            #cost
    
    return cost

end
