function Reward=OurPolicy1(user, channel, P, w, T, H, gamma)
Reward=zeros(1,T);
countnon0=zeros(T);
countnon1=zeros(T);
count0=zeros(T);
count1=zeros(T);
eps=0.1:0.1:1; % set of possible values
weight=ones(1, length(eps)); % weight initialization for EXP3
alpha=0.1;
Pest(1:user)=0.5;
eta=0.05;
num=2*log10(user*T/eta);
for t=1:T
    R=zeros(1, user);
    %% Window
    epsProb=(1-alpha)*weight/sum(weight)+alpha/length(eps);
    epsilon=eps(find(rand<cumsum(epsProb),1,'first'));
    win=ceil((1/epsilon));
    %% Episode
    AoI=zeros(1, user);
    for h=1:H
        a=zeros(1, user);
        for i=1:user
           W(i)=WhittleIndex(w(i), AoI(i), Pest(i));
        end
        [~, index]=maxk(W, channel);
        a(index)=1;
        for i=1:user
            if a(i)==1
                if rand<=P(i, t)
                    if i>user/2
                    count1(t)=count1(t)+1;
                    else
                    countnon1(t)=countnon1(t)+1;  
                    end
                    AoI(i)=1;
                else
                    if i>user/2
                    count0(t)=count0(t)+1;
                    else
                    countnon0(t)=countnon0(t)+1;  
                    end
                    AoI(i)=AoI(i)+1;
                end
            else
                AoI(i)=AoI(i)+1;
            end
        end
        for i=1:user
         R(i)=R(i)+gamma^(h-1)*AoIfunction(w(i), AoI(i));
         Reward(t)=Reward(t)+gamma^(h-1)*AoIfunction(w(i), AoI(i));
        end
    end

    C0sta=sum(count0(1:t));
    C1sta=sum(count1(1:t));
    den=max(C1sta+C0sta,1);
    probsta=min(C1sta/(C1sta+C0sta), 1);
    probsta=min(probsta+num/den, 1);


    %epsilon estimate 
    if t>1
    C0non=sum(countnon0(t-1:t));
    C1non=sum(countnon1(t-1:t));
    den=max(C1non+C0non,1);
    probnonemp(t)=min(C1non/(C1non+C0non), 1);
    end
    if t>2
    epsilon=max(epsilon, 2*abs(probnonemp(t)-probnonemp(t-1)));
    end

    lw=max(t-win+1,1);

    C0non=sum(countnon0(lw:t));
    C1non=sum(countnon1(lw:t));
    den=max(C1non+C0non,1);
    probnon=min(C1non/(C1non+C0non), 1);
    %probnon=min(probnon+num/den+epsilon*win, 1);
    probnon=min(probnon+num/den+epsilon*win/2, 1);
    for i=1:user
        if i>user/2
          Pest(i)=probsta;
        else
          Pest(i)=probnon;
        end
    end
    for i=1:length(eps)
        if eps(i)==epsilon
         Rx=Reward(t)/(H*user);
         scale=(alpha*Rx)/(epsProb(i)*length(eps));
         weight(i)=weight(i)*exp(scale);
        end
    end
end 