import preliminary as pre
import numpy as np
import math
def collision_indicator(j,arm_preference,pull,M):
    C=0
    arm=pull[j]
    for i in range(M):
        if pull[i]==arm:
            #if (arm_preference[arm,:]).index(i)<(arm_preference[arm,:].tolist()).index(j):
            if pre.index(arm_preference[arm,:],M,i)< pre.index(arm_preference[arm,:],M,j):
                C=1
    return C
def CA_UCB(K,M,value,arm_preference,lamda,T):
    #return np.random.uniform(0,1)
    pull=np.random.randint(K,size=M)
    t0 = math.log(T)
    result=np.zeros(M,int)-1
    u=np.zeros((M,K))
    ucb=np.zeros((M,K))+100000000
    time=np.zeros((M,K),int)
    time_total=np.zeros((M,K),int)
    agent = list(np.arange(M))
    match = pre.stable_matching(agent, value, arm_preference, M, K)
    dot_number=int(T/100000)+1
    regret=np.zeros(dot_number)
    for i in range(M):
        if collision_indicator(i,arm_preference,pull,M)==0:
            result[i]=pull[i]
            reward = pre.Bernoulli(value[i, pull[i]]) * (
                        1 - collision_indicator(i, arm_preference, pull, M))
            u[i, pull[i]] = (u[i, pull[i]] * time[i, pull[i]] + reward) / (time[i, pull[i]] + 1)
            time[i, pull[i]] = time[i, pull[i]] + 1
            ucb[i,pull[i]]=u[i, pull[i]] + ((2*t0)/ time[i, pull[i]])**(1/2)
        else:
            result[i]=-1
    for t in range(T-1):
        for i in range(M):
            if pre.Bernoulli(lamda)==0:
                plausible=[]
                for j in range(M):
                    if result[j]!=-1:
                        arm=result[j]
                        if pre.index(arm_preference[arm,:],M,i)<= pre.index(arm_preference[arm,:],M,j):
                            plausible.append(arm)
                for k in range(K):
                    available=1
                    arm=k
                    for m in range(M):
                        if result[m]==k:
                            available=0
                    if available==1:
                        plausible.append(arm)
                #plausible=np.arange(plausible)
                maximal=-1
                for arm in plausible:
                    if maximal==-1:
                        maximal=arm
                    if ucb[i,arm]>ucb[i,maximal]:
                        maximal=arm
                pull[i]=maximal
        for i in range (M):
            if collision_indicator(i, arm_preference, pull, M) == 0:
                result[i] = pull[i]
                reward = pre.Bernoulli(value[i, pull[i]]) * (
                        1 - collision_indicator(i, arm_preference, pull, M))
                u[i, pull[i]] = (u[i, pull[i]] * time[i, pull[i]] + reward) / (time[i, pull[i]] + 1)
                time[i, pull[i]] = time[i, pull[i]] + 1
                ucb[i, pull[i]] = u[i, pull[i]] + ((2 * t0) / time[i, pull[i]]) ** (1 / 2)
            else:
                result[i]=-1
            time_total[i,pull[i]]= time_total[i,pull[i]]+1
        if (t+2)%(100000)==0:
            regret_1 = 0
            earn = 0
            for i in range(M):
                for k in range(K):
                    earn = time[i, k] * value[i, k] + earn
            regret_1 = 0 - earn
            for i in range(M):
                regret_1 = (t+2) * (value[i, match[i]]) + regret_1
            nod=int((t+2)/100000)
            regret[nod]=regret_1
    return regret







