function Reward=WhittlePolicy(user, M,  Channel, T, H, gamma, actualprob, w)

Reward=zeros(1,T);
for t=1:T
    t
    R=zeros(1, user);

    %% Episode
    AoI=zeros(1, user);

    for h=1:H
        time=(t-1)*H+h;
        a=zeros(1, user);
        for i=1:user
           Whittle(i)=WhittleIndex(AoI(i), w(i), actualprob(i, t));
        end
       [~, index]=maxk(Whittle, M);
       a(index)=1;
       for i=1:user
           if a(i)==1 
               if rand<=actualprob(i, t)
                   AoI(i)=1;
               else
                   AoI(i)=AoI(i)+1;
               end
           else
               AoI(i)=AoI(i)+1;
           end
       end
        for i=1:user
         R(i)=R(i)+gamma^(h-1)*AoIfunction(AoI(i), w(i));
         Reward(t)=Reward(t)+gamma^(h-1)*AoIfunction(AoI(i), w(i));
        end
    end  
end




