tic 
set(groot,'defaultAxesTickLabelInterpreter','latex','defaulttextinterpreter','latex','defaultLegendInterpreter','latex');

% A_prev = [-2 4; 1 -2];
% A_next = [2 -4; -1 2];
% A_frst = [-2; 1];
% A_last = [-4; 2];

N = 3;          % Number of non-dummy players.

A_prev  = [-1 1; 1 -1];
A_next  = [1 -1; -1 1];
A_frst  = [-1; 1];
A_last  = [-1; 1];
ev      = @(k,n) [zeros(1,k-1) 1 zeros(1,n-k)];

n       = 2;                   % Number of actions.
inner   = 50;                  % Number of iterations.
eta     = 0.001;               % learning rates.        
etas    = ones(1,N)*eta;
%Ts      = linspace(10,0,outer);
outer   = 10000;
tplot   = 1:outer;

% Exploration rates
rates_z = [linspace(0,4,3*outer/4) linspace(4,0,outer/4)];
rates_x = linspace(3,0,outer);
rates_y = [linspace(0,2,outer/2) linspace(2,0,outer/2)];

rates_all = [...
    linspace(0,1,3*outer/4) linspace(1,0,outer/4);
    linspace(0,2,3*outer/4) linspace(2,0,outer/4);
    linspace(0,3,3*outer/4) linspace(3,0,outer/4);
    linspace(0,1,1*outer/4) linspace(1,0,3*outer/4);
    linspace(0,2,1*outer/4) linspace(2,0,3*outer/4);
    linspace(0,3,1*outer/4) linspace(3,0,3*outer/4);
    linspace(0,1,outer/2) linspace(1,0,outer/2);
    linspace(0,2,outer/2) linspace(2,0,outer/2);
    linspace(0,3,outer/2) linspace(3,0,outer/2);
    linspace(3,0,outer);
    linspace(2,0,outer);
    linspace(1,0,outer);
    ];

runs = 50;
plot_qre_all = zeros(outer,N,runs);
plot_utl_all = zeros(outer,N,runs);
plot_rts_all = zeros(outer,N,runs);

for tt = 1:runs
    idx = randi(12,1,3);
    d   = randi(3,1,3);
    rates_x = rates_all(idx(1),:).^d(1);
    rates_y = rates_all(idx(2),:).^d(2);
    rates_z = rates_all(idx(3),:).^d(3);

    plot_rts = zeros(outer,N);
    plot_qre = zeros(outer,N);
    plot_utl = zeros(outer,N);
    
    x0 = 0.5;
    x  = [x0*ones(N,1,inner+1),(1-x0)*ones(N,1,inner+1)]; % Mixed strategies of all players.
    for i = 1:N
        x0 = 0.1+rand*0.8;
        x(i,:,1) = [x0, 1-x0];
    end     

    for j = 1:outer
        as = [rates_x(j) rates_z(j) rates_y(j)];       
        u  = zeros(N,n,inner);  % Utilities of all players, all actions;
        U  = zeros(N,inner);    % Average utilities
        for t = 1:inner;
            % Utilities
            for i = 1:n
                u(1,i,t) = ev(i,n)*(A_frst+A_next*x(2,:,t)');
                for k = 2:N-1
                    u(k,i,t) = ev(i,n)*(A_prev*x(k-1,:,t)'+A_next*x(k+1,:,t)');
                end
                u(N,i,t) = ev(i,n)*(A_last+A_prev*x(N-1,:,t)');
            end
       
        % Q_learning gradient and update
            for k = 1:N
                U(k,t) = x(k,:,t)*u(k,:,t)';
                entrpy = x(k,:,t)*log(x(k,:,t))';
                for i = 1:n
                    x(k,i,t+1) = x(k,i,t)+etas(k)*x(k,i,t)*(u(k,i,t) - U(k,t) - as(k)*(log(x(k,i,t))-entrpy));
                end
            end
        end
    
        plot_qre(j,:) = x(:,1,end)';
        plot_utl(j,:) = U(:,end)';
        plot_rts(j,:) = as;
        x(:,:,1)      = x(:,:,end);
    end
    
    plot_qre_all(:,:,tt) = plot_qre;
    plot_utl_all(:,:,tt) = plot_utl;  
    plot_rts_all(:,:,tt) = plot_rts;  
end
toc
%% Plots
tic
co = [0 0.4470 0.7410;
     0.85 0.3250 0.098;
%     0.1290 0.3940 0.3250;
%   0.4940 0.1840 0.5560;
%    0.4660 0.6740 0.1880;
%     0 0.5 0;
     0.6350 0.0780 0.1840;
     0.3010 0.7450 0.9330;
];
 
fig = figure(1);
%set(fig,'defaultAxesColorOrder',co)
pos = get(fig,'position');
set(fig,'position',pos.*[0 1 2 0.45]);

subplot(1,4,2)
hold all
for i = 1:N
    color_value = co(i,:);
    fun_plot_distribution(1:outer,squeeze(plot_utl_all(:,i,:))',color_value);
end
grid on, box on
ylim([-0.5 inf]);

subplot(1,4,4)
ratio = squeeze(plot_utl_all(:,1,:))'./squeeze(plot_utl_all(:,3,:))';
color_value = co(4,:);
ss = fun_plot_distribution(1:outer,ratio,color_value);
hold all
scatter(linspace(0,outer,100), linspace(1,1,100),1,'filled');
grid on, box on
box on, grid on
ylim([0 2]);
lgd = legend(ss,{'$U_1/U_3$'},'Location','best');
lgd.Position(1) = 0.87;
lgd.Position(2) = 0.73;

subplot(1,4,3)
hold all
for i = 1:N
    color_value = co(i,:);
    fun_plot_distribution(1:outer,squeeze(plot_rts_all(:,i,:))',color_value);
end
ylim([0 inf]);
grid on, box on

% subplot(1,4,2)
% inBetween = [qre_down, qre_up];
% fill(xx, inBetween, 'g');
% hold on;
% plot(xx, mean_qre,'LineWidth', 2);

subplot(1,4,1)
hold all
pp = [];
for i = 1:N
    color_value = co(i,:);
    pp(i) = fun_plot_distribution(1:outer,squeeze(plot_qre_all(:,i,:))',color_value);
end
grid on, box on
ylim([0 1]);

fg = gcf;
fg.Position(3) = fg.Position(3) + 250;
% add legend
Lgnd = legend(pp,{'$p_1$', '$p_2$', '$p_3$'}, 'Location','northwestoutside');
Lgnd.Position(1) = 0.67;
Lgnd.Position(2) = 0.6;

filename=['C:\Users\sleon\Dropbox\Post\SUTD.Postdoc\Q_learning' ...
    '\Q_learning_continued\Matlab.files_qlearning\Figures\figure_linenet_averages.png'];
exportgraphics(fig,filename,'BackgroundColor','white','Resolution','500')
toc