import numpy as np
import math
import preliminary
def exploration(arm,agent,arm_preference,u,T,time,value,M,t_total,t_total_collision,reward_0):
    pai=0
    K=int(np.shape(arm)[0])
    M_1 = int(np.shape(agent)[0])
    pull=np.zeros(M,int)
    t_1=(math.log(T,2))
    t_0=0
    K_0=(np.shape(t_total_collision)[1])
    for i in range(K_0):
        t_0=t_0+t_total_collision[0,i]
    for t in range (K*math.ceil(t_1)):
      for i in range(M_1):
            pull[agent[i]]=arm[(i+t)%K]
      t_0=t_0+1
      for i in agent:
         reward=preliminary.Bernoulli(value[i,pull[i]])*(1-preliminary.collision_indicator(i,arm_preference,pull,M,agent))
         u[i,pull[i]]=(u[i,pull[i]]*time[i,pull[i]]+reward)/(time[i,pull[i]]+1)
         time[i, pull[i]]=time[i,pull[i]]+1
         t_total[i, pull[i]] = t_total[i, pull[i]] + 1 * (1 - preliminary.collision_indicator(i, arm_preference, pull, M,agent))
         t_total_collision[i,pull[i]]=t_total_collision[i,pull[i]]+1
      if (t_0)%100000==0:
         earn = 0
         for i in range(M):
             for k in range(K):
                 earn = t_total[i, k] * value[i, k] + earn
         nod =int((t_0) / 100000)
         reward_0[nod] = earn
    return u,time,t_total,t_total_collision,reward_0
def whether_success(j,u,time,arm,T):
    t=math.log(T)
    for k1 in arm:
        for k2 in arm:
            if k1!=k2:
                if u[j,k1]>u[j,k2]:
                    if u[j, k1] - ((2*t) / time[j, k1])**(1/2) < u[j, k2] + ((2*t)/ time[j, k2])**(1/2):
                        return 0
                else:
                    if u[j, k1] +((2*t)/ time[j, k1])**(1/2) > u[j, k2] - ((2*t) / time[j, k2])**(1/2):
                        return 0

    return 1