function Reward=WIQL(user, channel, P, w, T, H, gamma, epsilon);
Reward=zeros(1,T);
AoIBound=30;
count=zeros(user, AoIBound, 2);
Whittle=zeros(user, AoIBound);
Q=zeros(user, AoIBound, 2);
alpha=0.9;
for t=1:T
    AoI=ones(1, user);
    for h=1:H
        k=t*h; %actual time
        e=user/(user+h);
        a=zeros(1, user);
        if rand<=e
          for i=1:user
             if AoI(i)>AoIBound
                 W(i)=Whittle(i, AoIBound);
             else
                W(i)=Whittle(i, AoI(i));
             end
          [~, index]=maxk(W,channel);
          end
        else
           a=zeros(1, user);
           index=randperm(user, channel);
        end
        a(index)=1;
        Z=AoI;
        for i=1:user
            if a(i)==1
                if rand<=P(i, t)
                    AoI(i)=1;
                else
                    AoI(i)=AoI(i)+1;
                end
            else
                AoI(i)=AoI(i)+1;
            end
           if AoI(i)<=AoIBound
           count(i, AoI(i), a(i)+1)=count(i, AoI(i), a(i)+1)+1;
           end
        end
        for i=1:user
        Reward(t)=Reward(t)+gamma^(h-1)*AoIfunction(w(i), AoI(i));
        end
        for i=1:user
            if Z(i)<=AoIBound
               if AoI(i)<=AoIBound
                   V=max(Q(i, AoI(i), 1), Q(i, AoI(i), 2));
               else
                  V=max(Q(i, AoIBound, 1), Q(i, AoIBound, 2));
               end

               beta=count(i, Z(i), a(i)+1);
               Q(i, Z(i), a(i)+1)=(1-alpha^beta)*Q(i, Z(i), a(i)+1)+(alpha^beta)*(AoIfunction(w(i), Z(i))+gamma*V);
            
               Whittle(i, Z(i))=Q(i, Z(i), 2)-Q(i, Z(i), 1);
            end
        end
    end 
end