function Reward=UCWhittleWindow(user, M,  Channel, T, H, gamma, actualprob, w)

Reward=zeros(1,T);
AoIbound=20;
count0=zeros(user, T); %failure event 
count1=zeros(user, T); %success event
countnot=zeros(user, T); % not activated 
eta=0.05; 
num=2*log10(user*T/eta);
%prob=actualprob(:,1)';
transprob0=zeros(user, AoIbound, AoIbound);
transprob1=zeros(user, AoIbound, AoIbound);

for u=1:user
for i=1:AoIbound
    for j=1:AoIbound
        if j==1
        transprob1(u, i, j)=actualprob(u, 1);
        elseif j==i+1
        transprob1(u, i, j)=1-actualprob(u, 1);  
        transprob0(u, i, j)=1;  
        end
    end
end
end

for t=1:T
    t
%     size(transprob0)
%     size(transprob1)
    R=zeros(1, user);
    WhittleValue=zeros(user, AoIbound);
    for u=1:user
        for d=1:AoIbound
            for x=1:AoIbound
                for y=1:AoIbound
                p0(x,y)=transprob0(u,x,y);
                p1(x,y)=transprob1(u, x,y);
                end
            end
            WhittleValue(u, d)=WhittleIndexUC(d, p0, p1, w(u), gamma, AoIbound);
        end
    end


    %% Episode
    AoI=ones(1, user);
    


    for h=1:H
        a=zeros(1, user);
        for i=1:user
           if AoI(i)<AoIbound
           Whittle(i)=WhittleValue(i, AoI(i));
           else 
           Whittle(i)=WhittleValue(i, AoIbound);
           end
        end
       if rand<0
       index=randperm(user,M);
       else
       [~, index]=maxk(Whittle, M);
       end

       a(index)=1;
       for i=1:user
           if a(i)==1 
               if rand<=actualprob(i, t)
                   count1(i, t)=count1(i, t)+1;
                   AoI(i)=1;
               else
                   count0(i, t)=count0(i, t)+1;
                   AoI(i)=AoI(i)+1;
               end
           else
               AoI(i)=AoI(i)+1;
               countnot(i, t)=countnot(i, t)+1;
           end
       end
        for i=1:user
         R(i)=R(i)+gamma^(h-1)*AoIfunction(AoI(i), w(i));
         Reward(t)=Reward(t)+gamma^(h-1)*AoIfunction(AoI(i), w(i));
        end
    end



    %% estimate
    for i=1:user
    win(i)=randi([1, 500], 1);
    lw(i)=max(t-win(i)+1, 1);
    C0=sum(count0(i, lw(i):t));
    C1=sum(count1(i, lw(i):t));
    den=max(C1+C0,1);
    denNot=sum(countnot(i, lw(i):t));
    prob(i)=min(C1/(C1+C0), 1);
    end
    prob(i)=min(prob(i)+num/den, 1);
    for u=1:user
    for i=1:AoIbound
       for j=1:AoIbound
           if j==1
              transprob1(u, i, j)=min(prob(u)+num/den, 1);
              transprob0(u, i, j)=num/denNot; 
           elseif j==2
               transprob1(u, i, j)=1-min(prob(u)+num/den, 1);
           elseif j==i+1  
              transprob0(u, i, j)=1-num/denNot;  
           end
       end
    end
   end
end
