function get_data

clear
clear global
clc
close all

load('parameters.mat');

global L M u_max q_dot_max q_dot_sat u_sat ll u_real_max 
global  A_dis omega_dis phi_dis fric_param fric_mat   neural_net K_p K_v k_d_1 k_d_2 q_sat

K_p = 1;
K_v = 1;
k_d_1 = 10;
k_d_2 = 10;

neural_net = importONNXNetwork('NeuralNetworkPendulum.onnx','OutputLayerType','regression');

%these are used to test maximum obtained values in a first run in order to
%normalize later
ll = 1;

%these are conservative upper bounds of the maximum values
u_sat =  10;
q_dot_sat = 1.35;
   
for kk = 1:50
    
    %take from test instances (>100)
    kk_idx = kk + 200;
    M = mass_cell{kk_idx};
    L = length_cell{kk_idx};
    A_dis = A_dis_cell{kk_idx};
    omega_dis = omega_dis_cell{kk_idx};
    phi_dis = phi_dis_cell{kk_idx};  
    state_0 = state_0_cell{kk_idx};
    
    clear reward cost
    
    
    tfinal = 10;
    size_data_points = 5000;
    tspan = linspace(0,tfinal,size_data_points);

    atol_ode = 1e-6;
    rtol_ode = 1e-6;
    options = odeset('AbsTol', atol_ode, 'RelTol', rtol_ode, 'Events', @EventFunction);    
    [tout,qout] = ode23(@pendulum_as,tspan,[state_0;0.01;0.01],options);
    u = zeros(numel(tout),1);

    for ii = 1:numel(tout)
        [~, u(ii)] = pendulum_as(tout(ii),qout(ii,:));
    end
    reward = zeros(numel(tout),1);
    H = 200;
    for ii=1:numel(tout)
        reward(ii) = -cos(qout(ii,1)) -0.1*sin(qout(ii,1)) - 0.1*qout(ii,2) - 0.001*u(ii)^2;
    end

    state_0 = qout(end,:)' ;
    tNow = tout(end); 
    
    
    if tNow == tfinal
        for ii= H + 1:numel(tout)
            cost(ii-H) = 0;
            for jj=1:H
                cost(ii-H) = cost(ii-H) + reward(ii-H+jj);
            end
        end   
        subplot(2,1,1)
        hold on
        plot(tout,reward)
        subplot(2,1,2)
        hold on        
        plot(tout(1:numel(tout)-H),cost);
        
        tall_cell{kk} = tout;
        reward_cell{kk} = reward;
        cost_cell{kk} = cost;
        continue    
    end
    
    tspan = linspace(tNow,tfinal,size_data_points-numel(tout));    

    %here the error e_v has reached zero and there's a discontinuity.
    %the system evolves on the sliding surface e_v = 0
    
    disp('running ode 2...')
    options2 = odeset('AbsTol', atol_ode, 'RelTol', rtol_ode);
    [tout2,qout2] = ode45(@sliding,tspan,state_0);

    u2 = zeros(numel(tout2),1);
    for ii = 1:numel(tout2)
        [~, u2(ii)] = sliding(tout2(ii),qout2(ii,:));
    end
    for ii=1:numel(tout2)
        reward(ii+numel(tout)) = -cos(qout2(ii,1)) -0.1*sin(qout2(ii,1)) - 0.1*qout2(ii,2) - 0.001*u2(ii)^2;
    end
    for ii=H +1:size_data_points
        cost(ii-H) = 0;
        for jj=1:H
            cost(ii-H) = cost(ii-H) + reward(ii-H+jj);
        end
    end
    tall= [tout;tout2];
    subplot(2,1,1)
    hold on
    plot(tall,reward)
    subplot(2,1,2)
    hold on        
    plot(tall(1:4800),cost);
        
    tall_cell{kk} = tall;
    reward_cell{kk} = reward;
    cost_cell{kk} = cost;

end
save('pendulum_reward_cost.mat','tall_cell','reward_cell','cost_cell');


function [dydt u2] = sliding(t,state)
    
    global u_sat q_dot_sat 
    global A_dis omega_dis phi_dis fric_param fric_mat neural_net K_p K_v k_d_1 k_d_2
      
    if size(state,1)==1
        state = state';
    end
    
    q = state(1);
    q_dot = state(2);
    d_1_hat = state(3);
        
    error = 1 - cos(q-pi);   
    error_dot = sin(q-pi)*q_dot;

      
    sin_norm = (sin(q) + 1)/2;
    q_dot_norm = (q_dot + q_dot_sat)/(q_dot_sat + q_dot_sat);    
    t_norm = t/10;
    nn_input = [sin_norm;q_dot_norm];
    tau_NN =  predict(neural_net,nn_input);
    u_NN = tau_NN*(u_sat - (-u_sat)) -u_sat;
    
    vd = - K_p*error;
    ev = q_dot - vd;
    vd_dot = - K_p*error_dot;
    q_ddot = vd_dot;
           
    
    u_NN = u_NN';
    u = u_NN - K_v*ev - d_1_hat*ev;    
    u2 = u;  
    
    d_1_hat_dot = k_d_1*norm(ev)^2;   
    d_2_hat_dot = k_d_2*norm(ev);    
    
    dydt = [q_dot; q_ddot;d_1_hat_dot;d_2_hat_dot];


function [dydt u]= pendulum_as(t,state)
    t
    global L  u_sat q_dot_sat   
    global A_dis omega_dis phi_dis fric_param fric_mat neural_net K_p K_v k_d_1 k_d_2
    
    
    if size(state,1)==1
        state = state';
    end
    
    q = state(1);
    q_dot = state(2);
    d_1_hat = state(3);
    d_2_hat = state(4);
   
    dis = A_dis*sin(omega_dis*t+phi_dis);  
    friction = q_dot.*fric_mat*diag(fric_param)*q_dot;
       
    q_dot_norm = zeros(6,1);
    q_norm = zeros(6,1);
    
    error = 1 - cos(q-pi);
    g= 9.81;    
    
   
    %control policy
	sin_norm = (sin(q) + 1)/2;
    q_dot_norm = (q_dot + q_dot_sat)/(q_dot_sat + q_dot_sat);    
    t_norm = t/20;
    nn_input = [sin_norm;q_dot_norm];
    tau_NN =  predict(neural_net,nn_input);
    u_NN = tau_NN*(u_sat - (-u_sat)) -u_sat;
    
      
    vd = - K_p*error;
    ev = q_dot - vd;
    ev_hat = 0;
    if norm(ev) >=  10^(-8)
       ev_hat = ev/norm(ev);
    end    
    u_NN = u_NN';
    u = u_NN - K_v*ev - d_1_hat*ev - d_2_hat*ev_hat;
    
    norm(ev)
    dynamics = u - g/L*sin(q) + dis;   
    d_1_hat_dot = k_d_1*norm(ev)^2;   
    d_2_hat_dot = k_d_2*norm(ev);
    
    dydt = [q_dot;dynamics;d_1_hat_dot;d_2_hat_dot];
    

function [value,isterminal,direction] = EventFunction(t,state)
    
    global K_p 
    q = state(1);
    q_dot = state(2);
    d_1_hat = state(3);
    d_2_hat = state(4);
    
    error = 1 - cos(q-pi);
    vd = - K_p*error;
    ev = q_dot - vd; 
    value = norm(ev) - 10^(-6);
    isterminal = true;    
    direction = 0;    
  