function qre = compute_qre(as,N,n,etas,iter,x)

% A_prev = [-2 4; 1 -2];
% A_next = [2 -4; -1 2];
% A_frst = [-2; 1];
% A_last = [-4; 2];

A_prev = [-1 1; 1 -1];
A_next = [1 -1; -1 1];
A_frst = [-1; 1];
A_last = [-1; 1];

ev = @(k,n) [zeros(1,k-1) 1 zeros(1,n-k)];
u  = zeros(N,n,iter);  % Utilities of all players, all actions;
U  = zeros(N,iter);

for t = 1:iter;
    % Utilities
    for i = 1:n
        u(1,i,t) = ev(i,n)*(A_frst+A_next*x(2,:,t)');
        for k = 2:N-1
            u(k,i,t) = ev(i,n)*(A_prev*x(k-1,:,t)'+A_next*x(k+1,:,t)');
        end
        u(N,i,t) = ev(i,n)*(A_last+A_prev*x(N-1,:,t)');
    end
       
    % Q_learning gradient and update
    for k = 1:N
        U(k,t) = x(k,:,t)*u(k,:,t)';
        entrpy = x(k,:,t)*log(x(k,:,t))';
        for i = 1:n
            x(k,i,t+1) = x(k,i,t)+etas(k)*x(k,i,t)*(u(k,i,t) - U(k,t) - as(k)*(log(x(k,i,t))-entrpy));
        end
    end
end
xplot = squeeze(x(:,1,1:iter))';
qre = xplot(end,:);