close all;
clear all;
clc;


% Modified UCWhittle, Our Policy, Uniform Randomized, Whittle Index Policy for Reliable, Whittle Optimal   
M=1; %number of M
class=2;
k=5;
N=class*k; %number of user 
T=50; %total number of episodes
TotalState=10;
R=0:1:TotalState-1;

iteration=10;
for j=1:iteration
    j
for n=1:N
    if n>N/2
          P0(n,1)=0.5;
          P0prev(n, 1)=0.5; %p_n(t)
          P0next(n, 1)=0;
          P1(n, 1)=0.5;
          P1prev(n, 1)=0;
          P1next(n, 1)=0.5; %q_n(t)
    else
          P0(n,1)=0.5;
          P0prev(n, 1)=0.5; %p_n(t)
          P0next(n, 1)=0;
          P1(n, 1)=0;
          P1prev(n, 1)=0;
          P1next(n, 1)=1; %q_n(t)
    end
end
epsilon=0.35;
prev=0.5;
pnext=0.5;
for t=2:T
    if rand<0.5
    prev=min(prev+epsilon/2, 1);
    else
    prev=max(prev-epsilon/2, 0); 
    end
    if rand<0.5
    pnext=min(pnext+epsilon/2, 1);
    else
    pnext=max(pnext-epsilon/2, 0); 
    end
      
    for n=1:N
%         if n>N/2
          P0prev(n, t)=prev;
          P0(n, t)=1-P0prev(n, t);
          P0next(n, t)=P0next(n, t-1);
          P1next(n, t)=pnext;
          P1(n, t)=1-P1next(n, t);
          P1prev(n, t)=0;
%         else
%           P0(n,t)=P0(n,t-1);
%           P0prev(n, t)=P0prev(n, t-1);
%           P0next(n, t)=P0next(n, t-1);
%           P1(n, t)=P1(n, t-1);
%           P1prev(n, t)=P1prev(n, t-1);
%           P1next(n, t)=P1next(n, t-1);
%         end

    end
          
end


H=50; %time horizon
gamma=0.99;  %discount factor

RWhittle(j, 1:T)=WhittleOracle1(N, M, P0, P0next, P0prev, P1, P1next, P1prev, H, gamma, R, T);
win=1;
Reward(j,1:T)=OurPolicy1(N, M, P0, P0next, P0prev, P1, P1next, P1prev, H, gamma, epsilon, R, T);
%Reward1(j,1:T)=OurPolicyWindow(N, M, P0, P0next, P0prev, P1, P1next, P1prev, H, gamma, epsilon, win, R, T);
RewardUC(j,1:T)=UCWhittle1(N, M, P0, P0next, P0prev, P1, P1next, P1prev, H, gamma, epsilon, R, T);
RewardUCWindow(j,1:T)=UCWhittleWindow(N, M, P0, P0next, P0prev, P1, P1next, P1prev, H, gamma, epsilon, win, R, T);
RewardWIQL(j,1:T)=WIQL1(N, M, P0, P0next, P0prev, P1, P1next, P1prev, H, gamma, R, T);
RewardWIQL1(j,1:T)=WIQLwindow1(N, M, P0, P0next, P0prev, P1, P1next, P1prev, H, gamma, R, T);
Rewarduni(j,1:T)=random1(N, M, P0, P0next, P0prev, P1, P1next, P1prev, H, gamma, R, T);
end
for l=1:iteration
    for t=1:T

  Reward(l,t)=max(RWhittle(l,t)-Reward(l,t), 0);
  %Reward1(l,t)=max(RWhittle(l,t)-Reward1(l,t), 0);
 RewardUC(l,t)=max(RWhittle(l,t)-RewardUC(l,t), 0);
 RewardUCWindow(l,t)=max(RWhittle(l,t)-RewardUCWindow(l,t), 0);
  RewardWIQL(l,t)=max(RWhittle(l,t)-RewardWIQL(l,t), 0);
  RewardWIQL1(l,t)=max(RWhittle(l,t)-RewardWIQL1(l,t), 0);
   Rewarduni(l,t)=max(RWhittle(l,t)-Rewarduni(l,t), 0);
    end
end

episode=[1, 5:5:50];
 for e=1:length(episode)
     i=episode(e);
 Regretiter=zeros(1, iteration);
 %Regretiter1=zeros(1, iteration);
 RegretiterUC=zeros(1, iteration);
  RegretiterUCWindow=zeros(1, iteration);
 RegretiterWIQL=zeros(1, iteration);
  RegretiterWIQL1=zeros(1, iteration);
 Regretiteruni=zeros(1, iteration);
 for j=1:iteration
     Regretiter(j)=sum(Reward(j,1:i));
%      Regretiter1(j)=sum(Reward1(j,1:i));
   RegretiterUC(j)=sum(RewardUC(j,1:i));
   RegretiterUCWindow(j)=sum(RewardUCWindow(j,1:i));
      RegretiterWIQL(j)=sum(RewardWIQL(j,1:i));
       RegretiterWIQL1(j)=sum(RewardWIQL1(j,1:i));
   Regretiteruni(j)=sum(Rewarduni(j,1:i));
 end
 Regret(e)=mean(Regretiter);
  %Regret1(e)=mean(Regretiter1);
 Regretconfidence(e)=1.96*std(Regretiter)/sqrt(iteration);
 RegretUC(e)=mean(RegretiterUC);
 RegretUCconfidence(e)=1.96*std(RegretiterUC)/sqrt(iteration);
 RegretUCWindow(e)=mean(RegretiterUCWindow);
 RegretUCWindowconfidence(e)=1.96*std(RegretiterUCWindow)/sqrt(iteration);
 RegretWIQL(e)=mean(RegretiterWIQL);
 RegretWIQLconfidence(e)=1.96*std(RegretiterWIQL)/sqrt(iteration);
  RegretWIQL1(e)=mean(RegretiterWIQL1);
 RegretWIQL1confidence(e)=1.96*std(RegretiterWIQL1)/sqrt(iteration);
 Regretuni(e)=mean(Regretiteruni);
 Regretuniconfidence(e)=1.96*std(Regretiteruni)/sqrt(iteration);
 end 
% figure(1)
% semilogy(episode, Regretuni, 'b-.',episode, RegretUC,'r--',episode, RegretUCWindow,'r--',episode, RegretWIQL, 'mo-', episode, Regret,'k-');
% xlabel('Episode'), ylabel('Reg(T)')
% legend('Random', 'UCWhittle','WIQL', 'OurPolicy')
figure(1)
semilogy(episode, Regretuni, 'b-.',episode, RegretUC,'rx-',episode, RegretWIQL,  'ms--', episode, RegretUCWindow,'ro-', episode,  Regret, 'k-');
xlabel('Episode'), ylabel('Reg(T)') 
legend('Random', 'UCWhittle','WIQL','UCWhittleWindow', 'OurPolicy1')

