import numpy as np
import exploration
import preliminary as pre
import initialization
import math
def GS(optimal,u,preference,t_total,t_total_collision,M,K,agent):
    estimation=np.zeros((M,K),int)
    pull=np.zeros(M,int)
    for i in range (M):
        estimation[i, :] = u[i, :].argsort()[::-1]
    for i in range(M):
        pull[i]=estimation[i,optimal[i]]
    for i in range(M):
        if pre.collision_indicator(i,preference,pull,M,agent)==1:
            optimal[i]=optimal[i]+1
        else:
            t_total[i,pull[i]]=t_total[i,pull[i]]+1
        t_total_collision[i,pull[i]]=t_total_collision[i,pull[i]]+1
    return optimal,t_total,t_total_collision
def Phased_ETC(K,M,T,preference,value,E):
    #return np.random.uniform(0,1)
    t_total = np.zeros((M, K), int)
    t_total_collision = np.zeros((M, K), int)
    u = np.zeros((M, K))
    agent = list(np.arange(M))
    arm = np.arange(K)
    arm = list(arm)
    match = pre.stable_matching(agent, value, preference, M, K)
    time = np.zeros((M, K),int)
    optimal = np.zeros(M, int)
    information = np.zeros((M, K, M), int) - 1
    dot_number =int( T / 100000)+1
    regret = np.zeros(dot_number)
    information, t_total, t_total_collision = initialization.index_assignment(preference, M, 1, information, t_total,
                                                                              t_total_collision)
    pull=np.zeros(M,int)
    t_1=0
    for t in range(M,T):
        i_1=int(math.log(t,2))
        l=int(i_1**E)
        if t-2**i_1+1<=K*(l):
            optimal = np.zeros(M, int)
            for i in range(M):
                pull[i]=(t+i-2**i_1+2)%K
            for i in range(M):
                reward = pre.Bernoulli(value[i, pull[i]]) * (1 - pre.collision_indicator(i, preference, pull, M, agent))
                u[i, pull[i]] = (u[i, pull[i]] * time[i, pull[i]] + reward) / (time[i, pull[i]] + 1)
                time[i,pull[i]] = time[i, pull[i]] + 1
                t_total[i, pull[i]] = t_total[i, pull[i]] + 1 * (
                            1 - pre.collision_indicator(i, preference, pull, M, agent))
                t_total_collision[i, pull[i]] = t_total_collision[i, pull[i]] + 1
            t_1=t_1+1
        else:
            optimal,t_total,t_total_collision=GS(optimal,u,preference,t_total,t_total_collision,M,K,agent)
        if (t+1)%(100000)==0:
            regret_1 = 0
            earn = 0
            for i in range(M):
                for k in range(K):
                    earn = t_total[i, k] * value[i, k] + earn
            regret_1 = 0 - earn
            for i in range(M):
                regret_1 = (t+1) * (value[i, match[i]]) + regret_1
            nod=int((t+1)/100000)
            regret[nod]=regret_1
    return regret

