close all;
clear all;
clc;

M=1; %number of channel or M
user=6;
a=load('channelCentCamp.mat').Combined_data;
b=load('channelMainCamp.mat').Combined_data;
c=load('channelVetCamp.mat').Combined_data;

w=[0.4, 0.9, 0.4, 0.5, 0.9, 0.4]; % function parameter 

%% channel statistics
MaxH=2200;
SigThreshold=-70;
Channel=zeros(6, MaxH);
Channel(1,:)=a(1:MaxH, 1);
Channel(1,:) =fillmissing(Channel(1,:), 'constant', mean(Channel(1,:), 'all', 'omitnan'));
Channel(1,:)=1./(1+0.2*abs(Channel(1,:) + 100));
%Channel(1,:) = double(Channel(1,:) > -100);
Channel(2,:)=a(1:MaxH, 2);
Channel(2,:) =fillmissing(Channel(2,:), 'constant', mean(Channel(2,:), 'all', 'omitnan'));
Channel(2,:)=1./(1+0.2*abs(Channel(2,:) + 85));
%Channel(2,:) = double(Channel(2,:) > -85);
Channel(3,:)=b(1:MaxH, 1);
Channel(3,:) =fillmissing(Channel(3,:), 'constant', mean(Channel(3,:), 'all', 'omitnan'));
Channel(3,:)=1./(1+0.2*abs(Channel(3,:) + 115));
%Channel(3,:) = double(Channel(3,:) > -115);
Channel(4,:)=b(1:MaxH, 2);
Channel(4,:) =fillmissing(Channel(4,:), 'constant', mean(Channel(4,:), 'all', 'omitnan'));
Channel(4,:)=1./(1+0.2*abs(Channel(4,:) + 70));
%Channel(4,:) = double(Channel(4,:) > -70);
Channel(5,:)=c(1:MaxH, 1);
Channel(5,:) =fillmissing(Channel(5,:), 'constant', mean(Channel(5,:), 'all', 'omitnan'));
Channel(5,:)=1./(1+0.2*abs(Channel(5,:) + 110));
%Channel(5,:) = double(Channel(5,:) > -110);
Channel(6,:)=c(1:MaxH, 2);
Channel(6,:) =fillmissing(Channel(6,:), 'constant', mean(Channel(6,:), 'all', 'omitnan'));
Channel(6,:)=1./(1+0.2*abs(Channel(6,:) + 70));
%Channel(6,:) = double(Channel(6,:) > -70);
%Channel = repmat(Channel, 1, 10);
%plot(Channel(6,:))
% ylim([-1, 2]);

%multiplyfactor=10;
%Channel = repelem(Channel, 1, multiplyfactor);
% 
gamma=0.9; % discount factor 
T=500;
H=500;
% probability
for t=1:T
for i=1:user
    actualprob(i, t)=sum(Channel(i,(t-1)*H+1:t*H))/H;
 end
end
actualprob=Channel;
figure(1)
plot(1:T, actualprob(1, 1:T), 'o-', 1:T, actualprob(2, 1:T), 'x-', 1:T, actualprob(3, 1:T), 'd--', 1:T, actualprob(4, 1:T), 's--', 1:T, actualprob(5, 1:T), '+-', 1:T, actualprob(5, 1:T), ':', 'MarkerSize',10 )
legend('user1', 'user2', 'user3', 'user4', 'user5', 'user6')
% plot(1:T, actualprob(6, :), 'o-')
xlabel('Episode'), ylabel('Success Probability')


figure(1); % Use a new figure number

% Define common plotting options
marker_styles = {'k-', 'k-', 'k-', 'k-', 'k-', 'k-'};
user_labels = {'User 1', 'User 2', 'User 3', 'User 4', 'User 5', 'User 6'};

for n = 1:6 % Loop through all six users
    subplot(2, 3, n); % Create a 3x2 grid, plotting in the nth panel
    
    % Plot the data for the current user
    plot(1:T, actualprob(n, 1:T), marker_styles{n}, 'MarkerSize', 6);
    
    % Set the Y-axis limits explicitly to [0, 1] as requested
    %ylim([0, 1]); 
    
    title(user_labels{n});
    
    % Label the X-axis only for the bottom row plots (panels 5 and 6)
        xlabel('Episode');
    
    % Label the Y-axis only for the left column plots (panels 1, 3, and 5)
    if mod(n, 3) == 1
        ylabel('q_n(t)');
    end
    
end


epsilon=zeros(1, user);
variation=zeros(user, T-1);

for i=1:user
    variation(i,:)=abs(actualprob(i, 2:T)-actualprob(i, 1:T-1));
    epsilon(i)=sum(abs(actualprob(i, 2:T)-actualprob(i, 1:T-1)))/(T-1);
end

figure(2); % Use a new figure number

% Define common plotting options
marker_styles = {'k-', 'k-', 'k-', 'k-', 'k-', 'k-'};
user_labels = {'User 1', 'User 2', 'User 3', 'User 4', 'User 5', 'User 6'};

for n = 1:6 % Loop through all six users
    subplot(2, 3, n); % Create a 3x2 grid, plotting in the nth panel
    
    % Plot the data for the current user
    plot(1:T-1, variation(n, 1:T-1), marker_styles{n}, 'MarkerSize', 6);
    
    % Set the Y-axis limits explicitly to [0, 1] as requested
    %ylim([0, 1]); 
    
    title(user_labels{n});
    
    % Label the X-axis only for the bottom row plots (panels 5 and 6)
        xlabel('Episode');
    
    % Label the Y-axis only for the left column plots (panels 1, 3, and 5)
    if mod(n, 3) == 1
        ylabel('\epsilon_n(t)');
    end
    
end


iteration=5;

for l=1:iteration
R(l, 1:T)=WhittlePolicy(user, M,  Channel, T, H, gamma, actualprob, w);
R1(l, 1:T)=OurPolicy1(user, M,  Channel, T, H, gamma, actualprob, w, epsilon);
R2(l, 1:T)=OurPolicy2(user, M,  Channel, T, H, gamma, actualprob, w);
Rrandom(l, 1:T)=randomPolicy(user, M,  Channel, T, H, gamma, actualprob, w);
RUC(l, 1:T)=UCWhittle1(user, M,  Channel, T, H, gamma, actualprob, w);
RUCWindow(l, 1:T)=UCWhittleWindow(user, M,  Channel, T, H, gamma, actualprob, w);
RWIQL(l, 1:T)=WIQL(user, M, actualprob, w, T, H, gamma);
end

for l=1:iteration
    for t=1:T

  Reward(l,t)=abs(R(l,t)-R1(l,t));
  Reward1(l,t)=abs(R(l,t)-R2(l,t));
 RewardUC(l,t)=abs(R(l,t)-RUC(l,t));
 RewardUCWindow(l,t)=abs(R(l,t)-RUCWindow(l,t));
  Rewarduni(l,t)=max(R(l,t)-Rrandom(l,t));
  RewardWiQL(l, t)=max(R(l,t)-RWIQL(l, t));
    end
end

episode=[1, 100:100:500];
 for e=1:length(episode)
     i=episode(e);
 Regretiter=zeros(1, iteration);
  Regretiter1=zeros(1, iteration);
 RegretiterUC=zeros(1, iteration);
  RegretiterUCWindow=zeros(1, iteration);
 Regretiteruni=zeros(1, iteration);
  RegretiterWIQL=zeros(1, iteration);
 for j=1:iteration
     Regretiter(j)=sum(Reward(j,1:i));
    Regretiter1(j)=sum(Reward1(j,1:i));
   RegretiterUC(j)=sum(RewardUC(j,1:i));
   RegretiterUCWindow(j)=sum(RewardUCWindow(j,1:i));
   Regretiteruni(j)=sum(Rewarduni(j,1:i));
    RegretiterWIQL(j)=sum(RewardWiQL(j,1:i));
 end

 Regret(e)=mean(Regretiter);
  Regretconfidence(e)=1.96*std(Regretiter)/sqrt(iteration);
  Regret1(e)=mean(Regretiter1);
 Regretconfidence1(e)=1.96*std(Regretiter1)/sqrt(iteration);
 RegretUC(e)=mean(RegretiterUC);
 RegretUCconfidence(e)=1.96*std(RegretiterUC)/sqrt(iteration);
 RegretUCWindow(e)=mean(RegretiterUCWindow);
 RegretUCWindowconfidence(e)=1.96*std(RegretiterUCWindow)/sqrt(iteration);
 RegretWIQL(e)=mean(RegretiterWIQL);
 RegretWIQLconfidence(e)=1.96*std(RegretiterWIQL)/sqrt(iteration);
 Regretuni(e)=mean(Regretiteruni);
 Regretuniconfidence(e)=1.96*std(Regretiteruni)/sqrt(iteration);
  end 

range=1:100:T;
for i=1:length(range)
    Regret(i)=sum(abs(R(1:range(i))-R1(1:range(i))));
    Regret2(i)=sum(abs(R(1:range(i))-R2(1:range(i))));
    RegretUC(i)=sum(abs(R(1:range(i))-RUC(1:range(i))));
    Regretrandom(i)=sum(abs(R(1:range(i))-Rrandom(1:range(i))));
end


semilogy(episode, Regretuni, 'b-.',episode, RegretUC,'rx-',episode, RegretWIQL,  'ms--', episode,RegretUC1,'ro-', episode,  Regret1, 'k-');
xlabel('Episode'), ylabel('Reg(T)') 
legend('Random', 'UCWhittle','WIQL','UCWhittleWindow', 'OurPolicy1')