function Reward=WIQL1(N, M, P0, P0next, P0prev, P1, P1next, P1prev, H, gamma, R, T);
Reward=zeros(1,T);
count=zeros(N, length(R), 2);
Whittle=zeros(N, length(R));
Q=zeros(N, length(R), 2);
alpha=0.9;
for t=1:T
    state=ones(1, N);
    for h=1:H
        k=t*h; %actual time
        e=N/(N+h);
        a=zeros(1, N);
        if rand<=e
          for i=1:N
                 W(i)=Whittle(i, state(i));
          end
          [~, index]=maxk(W,M);
        else
           a=zeros(1, N);
           index=randperm(N, M);
        end
        a(index)=1;
        Z=state;
        for i=1:N
            randnumber=rand;
            if a(i)==1
                if randnumber<=P1(i, t)
                    state(i)=state(i);
                elseif randnumber>P1(i, t) && randnumber<=P1(i, t)+P1next(i, t)
                    if Z(i)<length(R)
                       state(i)=state(i)+1;
                    else
                        state(i)=state(i);
                    end
                else
                    if Z(i)>1
                       state(i)=state(i)-1;
                    else
                        state(i)=state(i);
                    end
                end
            else
                if randnumber<=P0(i, t)
                    state(i)=state(i);
                elseif randnumber>P0(i, t) && randnumber<=P0(i,t)+P0next(i, t)
                    if Z(i)<length(R)
                       state(i)=state(i)+1;
                    else
                        state(i)=state(i);
                    end
                else
                    if Z(i)>1
                       state(i)=state(i)-1;
                    else
                       state(i)=state(i);
                    end
                end
            end
            count(i, state(i), a(i)+1)=count(i, state(i), a(i)+1)+1;
        end
        for i=1:N
        Reward(t)=Reward(t)+gamma^(h-1)*R(state(i));
        end
        for i=1:N
               V=max(Q(i, state(i), 1), Q(i, state(i), 2));
               beta=count(i, Z(i), a(i)+1);
               Q(i, Z(i), a(i)+1)=(1-alpha^beta)*Q(i, Z(i), a(i)+1)+(alpha^beta)*(R(Z(i))+gamma*V);
               Whittle(i, Z(i))=Q(i, Z(i), 2)-Q(i, Z(i), 1);
         end
        end
    end 
end