function Reward=OurPolicy(user, channel, P, w, T, H, gamma, epsilon, win)
Reward=zeros(1,T);
countnon0=zeros(T);
countnon1=zeros(T);
count0=zeros(T);
count1=zeros(T);
C0=1;
C1=1;
Pest(1:user)=0.5;
eta=0.05;
num=2*log10(user*T/eta);
for t=1:T
    AoI=zeros(1, user);
    for h=1:H
        a=zeros(1, user);
        for i=1:user
           W(i)=WhittleIndex(w(i), AoI(i), Pest(i));
        end
        [~, index]=maxk(W, channel);
        a(index)=1;
        for i=1:user
            if a(i)==1
                if rand<=P(i, t)
                    if i>user/2
                    count1(t)=count1(t)+1;
                    else
                    countnon1(t)=countnon1(t)+1;  
                    end
                    AoI(i)=1;
                else
                    if i>user/2
                    count0(t)=count0(t)+1;
                    else
                    countnon0(t)=countnon0(t)+1;  
                    end
                    AoI(i)=AoI(i)+1;
                end
            else
                AoI(i)=AoI(i)+1;
            end
        end
        for i=1:user
        Reward(t)=Reward(t)+gamma^(h-1)*AoIfunction(w(i), AoI(i));
        end
    end
    lw=max(t-win+1,1);
    C0sta=sum(count0(1:t));
    C1sta=sum(count1(1:t));
    den=max(C1sta+C0sta,1);
    probsta=min(C1sta/(C1sta+C0sta), 1);
    probsta=min(probsta+num/den, 1);

    C0non=sum(countnon0(lw:t));
    C1non=sum(countnon1(lw:t));
    den=max(C1non+C0non,1);
    probnon=min(C1non/(C1non+C0non), 1);
    probnon=min(probnon+num/den+epsilon*win/2, 1);
    for i=1:user
        if i>user/2
          Pest(i)=probsta;
        else
          Pest(i)=probnon;
        end
    end
end 
