tic 
set(groot,'defaultAxesTickLabelInterpreter','latex','defaulttextinterpreter','latex','defaultLegendInterpreter','latex');

% A_prev = [-2 4; 1 -2];
% A_next = [2 -4; -1 2];
% A_frst = [-2; 1];
% A_last = [-4; 2];

N = 3;          % Number of non-dummy players.

policies = {'CLR-1', 'ETE'};
pol = {policies{1}, policies{2}};
aux_exploration_rates;

A_prev  = [-1 1; 1 -1];
A_next  = [1 -1; -1 1];
A_frst  = [-1; 1];
A_last  = [-1; 1];
ev      = @(k,n) [zeros(1,k-1) 1 zeros(1,n-k)];

n       = 2;                   % Number of actions.
inner   = 50;                  % Number of iterations.
outer   = length(rates_x);     % Number of outer steps.
eta     = 0.001;               % learning rates.        
etas    = ones(1,N)*eta;
%Ts      = linspace(10,0,outer);
outer   = 10000;
tplot   = 1:outer;

% Exploration rates
rates_z = [linspace(0,4,3*outer/4) linspace(4,0,outer/4)];
rates_x = linspace(3,0,outer);
rates_y = [linspace(0,2,outer/2) linspace(2,0,outer/2)];

plot_qre = zeros(outer,N);
plot_utl = zeros(outer,N);
x0 = 0.5;
x  = [x0*ones(N,1,inner+1),(1-x0)*ones(N,1,inner+1)]; % Mixed strategies of all players.

for i = 1:N
    x0 = 0.1+rand*0.8;
    x(i,:,1) = [x0, 1-x0];
end

d = 1; 

for j = 1:outer
    as = [rates_x(j)^d rates_z(j)^d rates_y(j)^d];
    u  = zeros(N,n,inner);  % Utilities of all players, all actions;
    U  = zeros(N,inner);    % Average utilities
    for t = 1:inner;
        % Utilities
        for i = 1:n
            u(1,i,t) = ev(i,n)*(A_frst+A_next*x(2,:,t)');
            for k = 2:N-1
                u(k,i,t) = ev(i,n)*(A_prev*x(k-1,:,t)'+A_next*x(k+1,:,t)');
            end
            u(N,i,t) = ev(i,n)*(A_last+A_prev*x(N-1,:,t)');
        end
       
        % Q_learning gradient and update
        for k = 1:N
            U(k,t) = x(k,:,t)*u(k,:,t)';
            entrpy = x(k,:,t)*log(x(k,:,t))';
            for i = 1:n
                x(k,i,t+1) = x(k,i,t)+etas(k)*x(k,i,t)*(u(k,i,t) - U(k,t) - as(k)*(log(x(k,i,t))-entrpy));
            end
        end
    end
    
    plot_qre(j,:) = x(:,1,end)';
    plot_utl(j,:) = U(:,end)';
    x(:,:,1)      = x(:,:,end);
end

%% Plots

co = [0 0.4470 0.7410;
     0.85 0.3250 0.098;
%     0.1290 0.3940 0.3250;
%   0.4940 0.1840 0.5560;
%    0.4660 0.6740 0.1880;
    0.6350 0.0780 0.1840;
%     0 0.5 0;
%     0.3010 0.7450 0.9330];
];

fig = figure(1);
set(fig,'defaultAxesColorOrder',co)
pos = get(fig,'position');
set(fig,'position',pos.*[0 1 2 0.5]);

subplot(1,4,2);
h = plot(plot_utl,'LineWidth', 1.5);
%legend(h,'$U_1$', '$U_2$', '$U_3$', 'Location','best');
title('Utilities', 'FontSize', 16);
box on, grid on

subplot(1,4,3);
rts = [rates_x.^d;rates_z.^d; rates_y.^d]';
hold all
tt = plot(rts,'LineWidth', 1.5);
title('Exploration Rates', 'FontSize', 16);
%legend(tt,{'$T_1$', '$T_2$', '$T_3$'}, 'Location','northeastoutside');
box on, grid on

subplot(1,4,4);
clr = [0.3010 0.7450 0.9330];
%clr = [0.6350, 0.0780, \0.1840];
ss = plot(plot_utl(:,1)./plot_utl(:,3),'LineWidth', 1.5,'Color',clr);
hold all
scatter(linspace(0,outer,100), linspace(1,1,100),1,'filled');
%dd = plot(sum(plot_utl,2),'LineWidth', 1);
title('Ratio of utilities','Fontsize',16);
lgd = legend('$U_1/U_3$','Location','best');
lgd.Position(1) = 0.87;
lgd.Position(2) = 0.73;
ylim([0 4])
box on, grid on

subplot(1,4,1);
pp = plot(plot_qre,'LineWidth', 1.5);
title('QRE strategies', 'FontSize', 16);
box on, grid on

fg = gcf;
fg.Position(3) = fg.Position(3) + 250;
% add legend
Lgnd = legend(pp,{'$p_1$', '$p_2$', '$p_3$'}, 'Location','northwestoutside');
Lgnd.Position(1) = 0.67;
Lgnd.Position(2) = 0.6;

filename=['C:\Users\sleon\Dropbox\Post\SUTD.Postdoc\Q_learning' ...
    '\Q_learning_continued\Matlab.files_qlearning\Figures\figure_linenet_1.png'];
%exportgraphics(fig2,['figure_spoiled_child_step_10000.png'],'BackgroundColor','white','Resolution','500')
exportgraphics(fig,filename,'BackgroundColor','white','Resolution','500')