% Mismatch the previous player, match the next player.
tic
%A_prev = [-1 1; 1 -1];
%A_next = [1 -1; -1 1];

A_prev = [-2 4; 1 -2];
A_next = [2 -4; -1 2];

A_frst = [-2; 1];
A_last = [-4; 2];
ev = @(k,n) [zeros(1,k-1) 1 zeros(1,n-k)];

N = 3;      % Number of non-dummy players.
n = 2;      % Number of actions.
T = 8000;  % Number of iterations.
%a = 1;
%as = ones(1,N)*a;
as = [1 2 4];   % exploration rates.
eta = 0.01;        % learning rates.        
etas = ones(1,N)*eta;

runs = 1;
last_it = zeros(runs,N);
last_ut = zeros(runs,N);

for j = 1:runs
    u  = zeros(N,n,T);  % Utilities of all players, all actions;
    U  = zeros(N,T);
    s  = zeros(N,n,T);  % Auxiliary quantity for all players, all actions;
    x0 = 0.5;
    x  = [x0*ones(N,1,T+1),(1-x0)*ones(N,1,T+1)]; % Mixed strategies of all players.

    for i = 1:N
        x0 = rand;
        x(i,:,1) = [x0, 1-x0];
    end

    for t = 1:T;
        % Utilities
        for i = 1:n
            u(1,i,t) = ev(i,n)*(A_frst+A_next*x(2,:,t)');
            for k = 2:N-1
                u(k,i,t) = ev(i,n)*(A_prev*x(k-1,:,t)'+A_next*x(k+1,:,t)');
            end
            u(N,i,t) = ev(i,n)*(A_last+A_prev*x(N-1,:,t)');
        end
       
    % Q_learning gradient and update
        for k = 1:N
            U(k,t) = x(k,:,t)*u(k,:,t)';
            entrpy = x(k,:,t)*log(x(k,:,t))';
            for i = 1:n
                x(k,i,t+1) = x(k,i,t)+etas(k)*x(k,i,t)*(u(k,i,t) - U(k,t) - as(k)*(log(x(k,i,t))-entrpy));
            end
        end
    end
U_df = zeros(T,1);
U_dl = zeros(T,1);

for t = 1:T
    U_df(t) = -x(1,:,t)*A_frst;
    U_dl(t) = -x(N,:,t)*A_last;
end

    U = U';
    xplot = squeeze(x(:,1,1:T))';
    last_it(j,:) = xplot(end,:);
    last_ut(j,:) = U(end,:);
end

last_it
last_ut

%figure_settings
%colororder([0, 0.4470, 0.7410; 0.6350, 0.0780, 0.1840])

hfig = figure(1);
pos = get(hfig,'position');
set(hfig,'position',pos.*[0 0 1 1.7]);
sgtitle(['Match-Mismatch NZSG: runs = ' num2str(runs) ', iter = ' num2str(T,'%1.0e')])%', exploration-rates = [' regexprep(num2str(as),'\s+',',') ']'])
title('No exploration','Color',[0.4940, 0.1840, 0.5560])

subplot(2,1,1);
yyaxis left
boxplot(last_it)
ylim([-0.1 1.1])
grid on 
box on
ylabel('$\Pr(H)$')
%xlabel('Agents')
yyaxis right
scatter(1:N,as,20,[0.6350, 0.0780, 0.1840],'filled');
ylim([-0.1 1.1]);
title('No exploration','Color',[0.4940, 0.1840, 0.5560])
%ylabel('Exploration Rates')
legend('exploration rates', 'location', 'NorthEast', 'Fontsize', 10, 'TextColor', [0.6350, 0.0780, 0.1840]);
%exportgraphics(fig1,'figure_no_prob.png','BackgroundColor','white','Resolution','500')

subplot(2,1,2);
yyaxis left
boxplot(last_ut)
ylim([-0.2 2.1])
grid on 
box on
%title(['Match-Mismatch NZSG: runs = ' num2str(runs) ', iterations = ' num2str(T,'%1.0e')])
ylabel('Rewards')
xlabel('Agents','Color',[0.4940, 0.1840, 0.5560])

yyaxis right
scatter(1:N,as,20,[0.6350, 0.0780, 0.1840],'filled')
%legend('exploration rates', 'location', 'NorthEast', 'Fontsize', 10, 'TextColor', [0.6350, 0.0780, 0.1840]);
ylim([-0.1 1.1])
%ylabel('Exploration Rates')


%exportgraphics(hfig,'figure_no.png','BackgroundColor','white','Resolution','1000')

toc
figure(1);
subplot(2,2,1);
plot(xplot);
xlabel('Iterations');
ylabel('$\Pr(H)$');
title('Choice distributions');

subplot(2,2,2);
plot(U);
xlabel('Iterations');
title('Utilities');

U = [U_df U U_dl];

subplot(2,2,3);
plot(round(sum(U,2),10));

subplot(2,2,4);
%plot_network;

set(groot,'defaultAxesTickLabelInterpreter','latex','defaulttextinterpreter','latex','defaultLegendInterpreter','latex');
clines = {};

for k = 1:N;
    clines{k} = fun_color_line3(xplot(:,k),1-xplot(:,k),1:T,1:T,'LineStyle','-','LineWidth',2);
    hold on
end

view(135,45);
grid on
xlim([0 1])
ylim([0 1])
box on
map = copper;
map = flipud(map(40:end,:));
colormap(map);
%title(['Match-MisMatch Network Game: $N=$ ' num2str(N)],'FontSize', 10)
xlabel('Heads')
ylabel('Tails')
zlabel('Iterations')

% Position of colorbar below Figure 2
cbr = colorbar('FontSize',8,'TickLabelInterpreter','latex','location','eastoutside','AxisLocation','in');
cbr.Label.String      = 'Iterations';
cbr.Label.FontSize    = 10;
cbr.Label.Interpreter = 'latex';
cpos     = get(cbr,'Position');
set(cbr,'Position',[cpos(1)+0.15 cpos(2) 0.01 cpos(4)]);
%orig1    = get(gca, 'Position');
%orig1(1) = orig1(1)-0.03;
%set(gca,'Position',orig1*1);

last_it = xplot(end,:)
%last_ut = U(end,:)