import math
import numpy as np
import random
import matplotlib.pyplot as plt
from tqdm import tqdm

def pull_arm(mu, i, a):
    X = random.uniform(0, 1)
    if X < mu[i][a]:
        return 1
    else:
        return 0
def Confidence(t, n_i_k_t, N_i, beta_i):
    C = (1 + beta_i) * math.sqrt(3 * math.log(t) / (N_i*n_i_k_t)) + (1 / (2 * t))
    return C

def get_bits(num):
    if num <= 0: num = 1
    return np.ceil(1 + np.log2(num))

def DUCB(N,M,T,mu,best_arm,W,beta,Neighbor,Neighbor_x):

    mu_star = np.mean(mu, axis=0)
    sum_reward = np.zeros((N,M))
    sum_pull_time = np.zeros((N,M))
    regret_list = [[0],[0],[0],[0],[0],[0],[0],[0]]
    comm_times = [0]
    comm_bits = [0]
    #print("start:",regret_list)

    z = np.zeros((N, M))
    hat_x = np.zeros((N, M,2))
    #z = mu
    #for i in range(N):
    #    for k in range(M):
    #        hat_x[i][k][0] = mu[i][k]

    retrain_time = 10
    z = np.zeros((N, M))
    hat_x = np.zeros((N, M,2))

    for train_time in range(retrain_time):
        for i in range(N):
            for k in range(M):
                reward = pull_arm(mu, i, k)
                hat_x[i][k][0] += reward
                hat_x[i][k][1] += 1
    for i in range(N):
       for k in range(M):
            z[i][k] = hat_x[i][k][0]/hat_x[i][k][1]
    # print(z)

    m = np.zeros((N, M))
    for i in range(N):
        for k in range(M):
            m[i][k] = retrain_time

    #for i in range(N):
    #    for k in range(M):
    #        sum_reward[i][k] += 1
    #        sum_pull_time[i][k] = np.zeros((N, M))
    #        regret_list = [[0], [0], [0]]
    #        m[i][k] = 1

    n = np.zeros((N, M))
    n = m

    C = 0

    for t in range(1,T):
        a = np.zeros(N)
        new_hat_x = hat_x
        comm_times.append(comm_times[-1])
        comm_bits.append(comm_bits[-1])
        for i in range(N):
            # line 3
            A = [set() for _ in range(N)]
            # A[0].add(1)
            # random_element = random.choice(list(A[0]))

            # line 4
            for k in range(M):
                if n[i][k] <= m[i][k] - M:
                    A[i].add(k)

            Q = np.zeros((N,M))
            if not A[i]:
                for k in range(M):
                    # print(len(Neighbor[i]))
                    Q[i][k] = z[i][k] + Confidence(t, n[i][k], len(Neighbor[i]), beta[i])
                a[i] = np.argmax(Q[i])
            else:
                a[i] = random.choice(list(A[i]))



            reward = pull_arm(mu,int(i),int(a[i]))
            new_hat_x[int(i)][int(a[i])][0] += reward
            new_hat_x[int(i)][int(a[i])][1] += 1
            sum_reward[i][int(a[i])] += reward
            sum_pull_time[i][int(a[i])] += 1
            regret_list[i].append((regret_list[i][-1] + mu_star[best_arm]-mu_star[int(a[i])]))


        new_z = np.zeros((N,M))
        for i in range(N):
            n[int(i)][int(a[i])] += 1
            for k in range(M):
                for nei in Neighbor[i]:
                    new_z[i][k] += W[i][nei] * z[nei][k]
                    comm_times[-1] += 1
                    comm_bits[-1] += get_bits(z[nei][k]) * 2
                new_z[i][k] +=  (new_hat_x[i][k][0]/new_hat_x[i][k][1] - hat_x[i][k][0]/hat_x[i][k][1])
                #new_z[i][k] =  sum(W[i] * ((z.T)[k])) + new_hat_x[i][k][0]/new_hat_x[i][k][1] - hat_x[i][k][0]/hat_x[i][k][1]
            for k in range(M):
                for j in (Neighbor[i]):
                    m[i][k] = max(n[i][k],m[j][k],m[i][k])

        hat_x = new_hat_x
        z = new_z


    Regret = []
    for agent in range(N):
        reward = 0
        for arm in range(M):
            reward += hat_x[agent][arm][0]
        best_reward = T*mu[agent][best_arm]
        Regret.append((best_reward-reward))
    result_x = np.zeros((M,2))
    for agent in range(N):
        result_x += hat_x[agent]
    result_mu = np.zeros(M)
    for arm in range(M):
        result_mu[arm] = result_x[arm][0]/result_x[arm][1]
    return np.array(regret_list), np.array(comm_times), np.array(comm_times)

if __name__ == '__main__':
    
    W = np.array([[0.2, 0.2, 0.2, 0, 0, 0, 0.2, 0.2],
                  [0.2, 0.2, 0.2, 0.2, 0, 0, 0, 0.2],
                  [0.2, 0.2, 0.2, 0.2, 0.2, 0, 0, 0],
                  [0, 0.2, 0.2, 0.2, 0.2, 0.2, 0, 0],
                  [0, 0, 0.2, 0.2, 0.2, 0.2, 0.2, 0],
                  [0, 0, 0, 0.2, 0.2, 0.2, 0.2, 0.2],
                  [0.2, 0, 0, 0, 0.2, 0.2, 0.2, 0.2],
                  [0.2, 0.2, 0, 0, 0, 0.2, 0.2, 0.2]])
    # mu = np.array([
    #     [0.8, 0.9, 0.95, 0.85, 0.85, 0.8, 0.7, 0.65, 0.75, 0.75],
    #     [0.7, 0.6, 0.2, 0.1, 0.7, 0.2, 0.3, 0.5, 0.7, 0.4],
    #     [0.3, 0.3, 0.6, 0.4, 0.8, 0.4, 0.5, 0.7, 0.6, 0.6],
    #     [0.6, 0.5, 0.7, 0.3, 0.85, 0.9, 0.6, 0.6, 0.3, 0.3],
    #     [0.7, 0.6, 0.8, 0.2, 0.85, 0.3, 0.3, 0.5, 0.3, 0.2],
    #     [0.5, 0.7, 0.9, 0.7, 0.9, 0.7, 0.6, 0.5, 0.3, 0.6],
    #     [0.4, 0.5, 0.7, 0.7, 0.7, 0.8, 0.3, 0.7, 0.7, 0.5],
    #     [0.3, 0.4, 0.6, 0.7, 0.75, 0.7, 0.5, 0.4, 0.2, 0.3]
    # ])

    mu8 = np.array([
        [0.9, 0.6, 0.8, 0.8, 0.6, 0.8, 0.7, 0.6],
        [0.8, 0.6, 0.7, 0.1, 0.6, 0.2, 0.3, 0.5],
        [0.7, 0.7, 0.6, 0.4, 0.6, 0.4, 0.5, 0.7],
        [0.8, 0.8, 0.6, 0.3, 0.6, 0.6, 0.6, 0.6],
        [0.7, 0.8, 0.8, 0.2, 0.6, 0.3, 0.3, 0.5],
        [0.9, 0.9, 0.6, 0.7, 0.6, 0.6, 0.6, 0.5],
        [0.8, 0.6, 0.9, 0.7, 0.6, 0.6, 0.3, 0.7],
        [0.8, 0.6, 0.6, 0.7, 0.6, 0.7, 0.5, 0.4]
    ])

    # mu9 = np.array([
    #     [0.9, 0.6, 0.8, 0.8, 0.8, 0.8, 0.7, 0.6, 0.6],
    #     [0.8, 0.6, 0.7, 0.1, 0.7, 0.2, 0.3, 0.5, 0.6],
    #     [0.7, 0.7, 0.6, 0.4, 0.8, 0.4, 0.5, 0.7, 0.7],
    #     [0.8, 0.8, 0.6, 0.3, 0.8, 0.9, 0.6, 0.6, 0.7],
    #     [0.7, 0.8, 0.8, 0.2, 0.8, 0.3, 0.3, 0.5, 0.7],
    #     [0.9, 0.9, 0.6, 0.7, 0.9, 0.7, 0.6, 0.5, 0.7],
    #     [0.8, 0.6, 0.9, 0.7, 0.7, 0.8, 0.3, 0.7, 0.7],
    #     [0.8, 0.6, 0.6, 0.7, 0.7, 0.7, 0.5, 0.4, 0.6]
    # ])


    # mu10 = np.array([
    #     [0.9, 0.6, 0.8, 0.8, 0.6, 0.8, 0.7, 0.6, 0.6, 0.9],
    #     [0.8, 0.6, 0.7, 0.1, 0.6, 0.2, 0.3, 0.5, 0.6, 0.7],
    #     [0.7, 0.7, 0.6, 0.4, 0.6, 0.4, 0.5, 0.7, 0.7, 0.7],
    #     [0.8, 0.8, 0.6, 0.3, 0.6, 0.6, 0.6, 0.6, 0.7, 0.2],
    #     [0.7, 0.8, 0.8, 0.2, 0.6, 0.3, 0.3, 0.5, 0.7, 0.2],
    #     [0.9, 0.9, 0.6, 0.7, 0.6, 0.6, 0.6, 0.5, 0.7, 0.2],
    #     [0.8, 0.6, 0.9, 0.7, 0.6, 0.6, 0.3, 0.7, 0.7, 0.9],
    #     [0.8, 0.6, 0.6, 0.7, 0.6, 0.7, 0.5, 0.4, 0.6, 0.5]
    # ])

    # mu10 = np.array([
    #     [0.9, 0.6, 0.8, 0.8, 0.8, 0.8, 0.7, 0.6, 0.6, 0.9],
    #     [0.8, 0.6, 0.7, 0.1, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7],
    #     [0.7, 0.7, 0.6, 0.4, 0.8, 0.4, 0.5, 0.7, 0.7, 0.7],
    #     [0.8, 0.8, 0.6, 0.3, 0.8, 0.9, 0.6, 0.6, 0.7, 0.2],
    #     [0.7, 0.8, 0.8, 0.2, 0.8, 0.3, 0.3, 0.5, 0.7, 0.2],
    #     [0.9, 0.9, 0.6, 0.7, 0.9, 0.7, 0.6, 0.5, 0.7, 0.2],
    #     [0.8, 0.6, 0.9, 0.7, 0.7, 0.8, 0.3, 0.7, 0.7, 0.9],
    #     [0.8, 0.6, 0.6, 0.7, 0.7, 0.7, 0.5, 0.4, 0.6, 0.5]
    # ])

    # mu11 = np.array([
    #     [0.9, 0.6, 0.8, 0.8, 0.6, 0.8, 0.7, 0.6, 0.6, 0.9, 0.7],
    #     [0.8, 0.6, 0.7, 0.1, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7, 0.1],
    #     [0.7, 0.7, 0.6, 0.4, 0.6, 0.4, 0.5, 0.7, 0.7, 0.7, 0.6],
    #     [0.8, 0.8, 0.6, 0.3, 0.6, 0.6, 0.6, 0.6, 0.7, 0.2, 0.3],
    #     [0.7, 0.8, 0.8, 0.2, 0.6, 0.3, 0.3, 0.5, 0.7, 0.2, 0.3],
    #     [0.9, 0.9, 0.6, 0.7, 0.6, 0.7, 0.6, 0.5, 0.7, 0.2, 0.3],
    #     [0.8, 0.6, 0.9, 0.7, 0.6, 0.8, 0.3, 0.7, 0.7, 0.9, 0.7],
    #     [0.8, 0.6, 0.6, 0.7, 0.6, 0.7, 0.5, 0.4, 0.6, 0.5, 0.8]
    # ])

    # mu12 = np.array([
    #     [0.9, 0.6, 0.8, 0.8, 0.6, 0.8, 0.7, 0.6, 0.6, 0.9, 0.7, 0.6],
    #     [0.8, 0.6, 0.7, 0.1, 0.6, 0.2, 0.3, 0.5, 0.6, 0.7, 0.1, 0.7],
    #     [0.7, 0.7, 0.6, 0.4, 0.6, 0.4, 0.5, 0.7, 0.7, 0.7, 0.6, 0.4],
    #     [0.8, 0.8, 0.6, 0.3, 0.6, 0.6, 0.6, 0.6, 0.7, 0.2, 0.3, 0.5],
    #     [0.7, 0.8, 0.8, 0.2, 0.6, 0.3, 0.3, 0.5, 0.7, 0.2, 0.3, 0.5],
    #     [0.9, 0.9, 0.6, 0.7, 0.6, 0.6, 0.6, 0.5, 0.7, 0.2, 0.3, 0.5],
    #     [0.8, 0.6, 0.9, 0.7, 0.6, 0.6, 0.3, 0.7, 0.7, 0.9, 0.7, 0.6],
    #     [0.8, 0.6, 0.6, 0.7, 0.6, 0.7, 0.5, 0.4, 0.6, 0.5, 0.8, 0.1]
    # ])

    # mu12 = np.array([
    #     [0.9, 0.6, 0.8, 0.8, 0.8, 0.8, 0.7, 0.6, 0.6, 0.9, 0.7, 0.6],
    #     [0.8, 0.6, 0.7, 0.1, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7, 0.1, 0.7],
    #     [0.7, 0.7, 0.6, 0.4, 0.8, 0.4, 0.5, 0.7, 0.7, 0.7, 0.6, 0.4],
    #     [0.8, 0.8, 0.6, 0.3, 0.8, 0.9, 0.6, 0.6, 0.7, 0.2, 0.3, 0.5],
    #     [0.7, 0.8, 0.8, 0.2, 0.8, 0.3, 0.3, 0.5, 0.7, 0.2, 0.3, 0.5],
    #     [0.9, 0.9, 0.6, 0.7, 0.9, 0.7, 0.6, 0.5, 0.7, 0.2, 0.3, 0.5],
    #     [0.8, 0.6, 0.9, 0.7, 0.7, 0.8, 0.3, 0.7, 0.7, 0.9, 0.7, 0.6],
    #     [0.8, 0.6, 0.6, 0.7, 0.7, 0.7, 0.5, 0.4, 0.6, 0.5, 0.8, 0.1]
    # ])

    # mu13 = np.array([
    #     [0.9, 0.6, 0.8, 0.8, 0.8, 0.8, 0.7, 0.6, 0.6, 0.9, 0.7, 0.6, 0.4],
    #     [0.8, 0.6, 0.7, 0.1, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7, 0.1, 0.7, 0.2],
    #     [0.7, 0.7, 0.6, 0.4, 0.8, 0.4, 0.5, 0.7, 0.7, 0.7, 0.6, 0.4, 0.8],
    #     [0.8, 0.8, 0.6, 0.3, 0.8, 0.9, 0.6, 0.6, 0.7, 0.2, 0.3, 0.5, 0.6],
    #     [0.7, 0.8, 0.8, 0.2, 0.8, 0.3, 0.3, 0.5, 0.7, 0.2, 0.3, 0.5, 0.6],
    #     [0.9, 0.9, 0.6, 0.7, 0.9, 0.7, 0.6, 0.5, 0.7, 0.2, 0.3, 0.5, 0.6],
    #     [0.8, 0.6, 0.9, 0.7, 0.7, 0.8, 0.3, 0.7, 0.7, 0.9, 0.7, 0.6, 0.5],
    #     [0.8, 0.6, 0.6, 0.7, 0.7, 0.7, 0.5, 0.4, 0.6, 0.5, 0.8, 0.1, 0.4]
    # ])

    # mu14 = np.array([
    #     [0.9, 0.6, 0.8, 0.8, 0.6, 0.8, 0.7, 0.6, 0.6, 0.9, 0.7, 0.6, 0.4, 0.5],
    #     [0.8, 0.6, 0.7, 0.1, 0.6, 0.2, 0.3, 0.5, 0.6, 0.7, 0.1, 0.7, 0.2, 0.3],
    #     [0.7, 0.7, 0.6, 0.4, 0.6, 0.4, 0.5, 0.7, 0.7, 0.7, 0.6, 0.4, 0.8, 0.4],
    #     [0.8, 0.8, 0.6, 0.3, 0.6, 0.6, 0.6, 0.6, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7],
    #     [0.7, 0.8, 0.8, 0.2, 0.6, 0.3, 0.3, 0.5, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7],
    #     [0.9, 0.9, 0.6, 0.7, 0.6, 0.6, 0.6, 0.5, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7],
    #     [0.8, 0.6, 0.9, 0.7, 0.6, 0.6, 0.3, 0.7, 0.7, 0.9, 0.7, 0.6, 0.5, 0.7],
    #     [0.8, 0.6, 0.6, 0.7, 0.6, 0.7, 0.5, 0.4, 0.6, 0.5, 0.8, 0.1, 0.4, 0.3]
    # ])

    # mu14 = np.array([
    #     [0.9, 0.6, 0.8, 0.8, 0.8, 0.8, 0.7, 0.6, 0.6, 0.9, 0.7, 0.6, 0.4, 0.5],
    #     [0.8, 0.6, 0.7, 0.1, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7, 0.1, 0.7, 0.2, 0.3],
    #     [0.7, 0.7, 0.6, 0.4, 0.8, 0.4, 0.5, 0.7, 0.7, 0.7, 0.6, 0.4, 0.8, 0.4],
    #     [0.8, 0.8, 0.6, 0.3, 0.8, 0.9, 0.6, 0.6, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7],
    #     [0.7, 0.8, 0.8, 0.2, 0.8, 0.3, 0.3, 0.5, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7],
    #     [0.9, 0.9, 0.6, 0.7, 0.9, 0.7, 0.6, 0.5, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7],
    #     [0.8, 0.6, 0.9, 0.7, 0.7, 0.8, 0.3, 0.7, 0.7, 0.9, 0.7, 0.6, 0.5, 0.7],
    #     [0.8, 0.6, 0.6, 0.7, 0.7, 0.7, 0.5, 0.4, 0.6, 0.5, 0.8, 0.1, 0.4, 0.3]
    # ])

    # mu15 = np.array([
    #     [0.9, 0.6, 0.8, 0.8, 0.8, 0.8, 0.7, 0.6, 0.6, 0.9, 0.7, 0.6, 0.4, 0.5, 0.6],
    #     [0.8, 0.6, 0.7, 0.1, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7, 0.1, 0.7, 0.2, 0.3, 0.5],
    #     [0.7, 0.7, 0.6, 0.4, 0.8, 0.4, 0.5, 0.7, 0.7, 0.7, 0.6, 0.4, 0.8, 0.4, 0.5],
    #     [0.8, 0.8, 0.6, 0.3, 0.8, 0.9, 0.6, 0.6, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7, 0.1],
    #     [0.7, 0.8, 0.8, 0.2, 0.8, 0.3, 0.3, 0.5, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7, 0.1],
    #     [0.9, 0.9, 0.6, 0.7, 0.9, 0.7, 0.6, 0.5, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7, 0.9],
    #     [0.8, 0.6, 0.9, 0.7, 0.7, 0.8, 0.3, 0.7, 0.7, 0.9, 0.7, 0.6, 0.5, 0.7, 0.9],
    #     [0.8, 0.6, 0.6, 0.7, 0.7, 0.7, 0.5, 0.4, 0.6, 0.5, 0.8, 0.1, 0.4, 0.3, 0.4]
    # ])

    # mu16 = np.array([
    #     [0.9, 0.6, 0.8, 0.8, 0.6, 0.8, 0.7, 0.6, 0.6, 0.9, 0.7, 0.6, 0.4, 0.5, 0.6, 0.6],
    #     [0.8, 0.6, 0.7, 0.1, 0.6, 0.2, 0.3, 0.5, 0.6, 0.7, 0.1, 0.7, 0.2, 0.3, 0.5, 0.5],
    #     [0.7, 0.7, 0.6, 0.4, 0.6, 0.4, 0.5, 0.7, 0.7, 0.7, 0.6, 0.4, 0.8, 0.4, 0.5, 0.5],
    #     [0.8, 0.8, 0.6, 0.3, 0.6, 0.6, 0.6, 0.6, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7, 0.1, 0.1],
    #     [0.7, 0.8, 0.8, 0.2, 0.6, 0.3, 0.3, 0.5, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7, 0.1, 0.1],
    #     [0.9, 0.9, 0.6, 0.7, 0.6, 0.6, 0.6, 0.5, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7, 0.9, 0.4],
    #     [0.8, 0.6, 0.9, 0.7, 0.6, 0.6, 0.3, 0.7, 0.7, 0.9, 0.7, 0.6, 0.5, 0.7, 0.9, 0.4],
    #     [0.8, 0.6, 0.6, 0.7, 0.6, 0.7, 0.5, 0.4, 0.6, 0.5, 0.8, 0.1, 0.4, 0.3, 0.4, 0.4]
    # ])

    # mu16 = np.array([
    #     [0.9, 0.6, 0.8, 0.8, 0.8, 0.8, 0.7, 0.6, 0.6, 0.9, 0.7, 0.6, 0.4, 0.5, 0.6, 0.6],
    #     [0.8, 0.6, 0.7, 0.1, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7, 0.1, 0.7, 0.2, 0.3, 0.5, 0.5],
    #     [0.7, 0.7, 0.6, 0.4, 0.8, 0.4, 0.5, 0.7, 0.7, 0.7, 0.6, 0.4, 0.8, 0.4, 0.5, 0.5],
    #     [0.8, 0.8, 0.6, 0.3, 0.8, 0.9, 0.6, 0.6, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7, 0.1, 0.1],
    #     [0.7, 0.8, 0.8, 0.2, 0.8, 0.3, 0.3, 0.5, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7, 0.1, 0.1],
    #     [0.9, 0.9, 0.6, 0.7, 0.9, 0.7, 0.6, 0.5, 0.7, 0.2, 0.3, 0.5, 0.6, 0.7, 0.9, 0.4],
    #     [0.8, 0.6, 0.9, 0.7, 0.7, 0.8, 0.3, 0.7, 0.7, 0.9, 0.7, 0.6, 0.5, 0.7, 0.9, 0.4],
    #     [0.8, 0.6, 0.6, 0.7, 0.7, 0.7, 0.5, 0.4, 0.6, 0.5, 0.8, 0.1, 0.4, 0.3, 0.4, 0.4]
    # ])

    T = int(1e6)  # time
    best_arm = int(np.argmax(np.mean(mu8, axis=0)))
    # print(mu_star)

    
    #Neighbor = [4,4,4,4,4,4,4,4]
    # Neighbor = np.array([[1, 2, 6, 7],
    #           [0, 2, 3, 7],
    #           [0, 1, 3, 4],
    #           [1, 2, 4, 5],
    #           [2, 3, 5, 6],
    #           [3, 4, 6, 7],
    #           [0, 4, 5, 7],
    #           [0, 1, 5, 6]])
    Neighbor = np.array([[1, 2, 6, 7, 0],
                         [0, 2, 3, 7, 1],
                         [0, 1, 3, 4, 2],
                         [1, 2, 4, 5, 3],
                         [2, 3, 5, 6, 4],
                         [3, 4, 6, 7, 5],
                         [0, 4, 5, 7, 6],
                         [0, 1, 5, 6, 7]])

    Neighbor_x = np.array([[1, 1, 1, 0, 0, 0, 1, 1],
                  [1, 1, 1, 1, 0, 0, 0, 1],
                  [1, 1, 1, 1, 1, 0, 0, 0],
                  [0, 1, 1, 1, 1, 1, 0, 0],
                  [0, 0, 1, 1, 1, 1, 1, 0],
                  [0, 0, 0, 1, 1, 1, 1, 1],
                  [1, 0, 0, 0, 1, 1, 1, 1],
                  [1, 1, 0, 0, 0, 1, 1, 1]])

    repeated_time = 10


    N = 8 # agent
    M = 8 # arm
    beta = [0.1] * N
    regret_lists = []
    comm_times_lists = []
    comm_bits_lists = []
    for repeat_time in tqdm(range(repeated_time)):
        regret_list, comm_times, comm_bits = DUCB(N, M, T, mu8, best_arm, W, beta, Neighbor, Neighbor_x)
        # print(regret_list.shape)
        regret_lists.append(regret_list)
        comm_times_lists.append(comm_times)
        comm_bits_lists.append(comm_bits)

    regret_lists = np.array(regret_lists)
    comm_times_lists = np.array(comm_times_lists)
    comm_bits_lists = np.array(comm_bits_lists)

    np.save('~/var_arm/data/ducb/regret_lists_mu8.npy', regret_lists)
    np.save('~/var_arm/data/ducb/comm_times_list_mu8.npy',comm_times_lists)
    np.save('~/var_arm/data/ducb/comm_bits_list_mu8.npy', comm_bits_lists)
    
    

    # regret_list_12 = []
    # for repeat_time in range(repeated_time):
    #     temp_regret_list = DUCB(N, 12, T, mu12, best_arm, W, beta, Neighbor, Neighbor_x)
    #     regret_list_12.append(temp_regret_list[0][-1])

    # regret_list_14 = []
    # for repeat_time in range(repeated_time):
    #     temp_regret_list = DUCB(N, 14, T, mu14, best_arm, W, beta, Neighbor, Neighbor_x)
    #     regret_list_14.append(temp_regret_list[0][-1])

    # regret_list_16 = []
    # for repeat_time in range(repeated_time):
    #     temp_regret_list = DUCB(N, 16, T, mu16, best_arm, W, beta, Neighbor, Neighbor_x)
    #     regret_list_16.append(temp_regret_list[0][-1])

    # regret_list_zero_np_8 = np.array(regret_list_8)
    # regret_mean_8 = np.mean(regret_list_zero_np_8)
    # regret_std_8 = np.std(regret_list_zero_np_8)

    

    # regret_list_zero_np_12 = np.array(regret_list_12)
    # regret_mean_12 = np.mean(regret_list_zero_np_12)
    # regret_std_12 = np.std(regret_list_zero_np_12)

    # regret_list_zero_np_14 = np.array(regret_list_14)
    # regret_mean_14 = np.mean(regret_list_zero_np_14)
    # regret_std_14 = np.std(regret_list_zero_np_14)

    # regret_list_zero_np_16 = np.array(regret_list_16)
    # regret_mean_16 = np.mean(regret_list_zero_np_16)
    # regret_std_16 = np.std(regret_list_zero_np_16)

    # regret_DIST = [regret_mean_8, regret_mean_10, regret_mean_12, regret_mean_14, regret_mean_16]
    #plt.plot([8,10,12,14,16], regret_DIST, linestyle='--', color='pink', marker='v', markerfacecolor='none', markersize=10)

    #plt.fill_between([8,10,12,14,16], [regret_mean_8-regret_std_8,regret_mean_10-regret_std_10,regret_mean_12-regret_std_12,regret_mean_14-regret_std_14,regret_mean_16-regret_std_16], [regret_mean_8+regret_std_8,regret_mean_10+regret_std_10,regret_mean_12+regret_std_12,regret_mean_14+regret_std_14,regret_mean_16+regret_std_16],color='LightPink', alpha=0.2)

    #plt.title('regret')
    #plt.xlabel('time')
    #plt.ylabel('value')
    #plt.legend()
    #plt.show()

    # return (regret_DIST,[regret_mean_8-regret_std_8,regret_mean_10-regret_std_10,regret_mean_12-regret_std_12,regret_mean_14-regret_std_14,regret_mean_16-regret_std_16], [regret_mean_8+regret_std_8,regret_mean_10+regret_std_10,regret_mean_12+regret_std_12,regret_mean_14+regret_std_14,regret_mean_16+regret_std_16])

    # return regret_list_10