import numpy as np
from tqdm import trange
import time, json, itertools, os
from itertools import product
from data import generate_diverse_theta, fixed_regular_cxts, uniform_cxts, pareto_front
from train import P_UCB, MOGLM_UCB, MORR_Greedy
import matplotlib.pyplot as plt
import seaborn as sns
import pickle 
%load_ext autoreload
%autoreload 2

    
def eval_P_UCB(K, m, mu_list=None, T=30000, Rep=10, sig=0.1, seed=0, output=False):
    #evaluate MORR_Greedy
    #inputs: Rep, K, d, m, T, sig(reward std), seed
    results = []
    
    if mu_list is None:
        exp_rewards=np.array(np.random.uniform(-1, 1, (K, m)))
        
    else:
        exp_rewards=mu_list
    
    #precalculation
    true_pareto=pareto_front(exp_rewards) 
    print("pareto front: ", true_pareto)
    subopt_gap=[max(min(exp_rewards[i,:]-exp_rewards[j,:]) for i in true_pareto) for j in range(K)]
    
    cumul_regret = np.zeros((Rep,T))
    elapsed_time = np.zeros((Rep,T))
    selected_rounds = np.zeros((Rep,K))
        
    for r in range(Rep):
        print('P-UCB Simulation %d, K=%d, m=%d' % (r+1, K, m))
        # call model
        moP_UCB =  P_UCB(K=K,m=m, TPF_size=len(true_pareto))
        regret = []
        s_rounds = np.zeros(K)

        for t in trange(T):
            # time
            start = time.time()
            a_t = moP_UCB.select_ac()
            reward = exp_rewards[a_t,:]+ np.random.normal(0, sig, size=m)
            moP_UCB.update(a_t, reward)
            elapsed_time[r,t] = time.time() - start
                
            #regret=suboptimality gap
            regret.append(subopt_gap[a_t])

                
            #selected rounds update 
            s_rounds[a_t]+=1
                
        cumul_regret[r,:] = np.cumsum(regret)
        selected_rounds[r,:]=s_rounds
            
        print("Total Regret:", cumul_regret[r,:][-1])
            
    ##Save at dict
    results.append({'model':'P_UCB',
                    'regrets':cumul_regret.tolist(),
                    'selected_rounds':selected_rounds.tolist(),
                    'time':elapsed_time.tolist()})
    if output:
        # Plotting
        result_P_UCB = results[0]
        return result_P_UCB
    else:
        # Save to txt file
        with open('./results/P_UCB_K%d_m%d.txt' % (d, K, m), 'w+') as outfile:
            json.dump(results, outfile)

def eval_MOGLM_UCB(K, d, m, fixed=True, Tlist=None, cxts=None, c_set=[1,0.1], T=30000, Rep=10, sig=0.1, seed=0, output=False):
    #evaluate UCB
    #inputs: Rep, K, d, m, T, sig(reward std), seed
    results = []
    if Tlist is None:
        #generate_diverse_theta's & fixed contexts 
        thetalist=generate_diverse_theta(d=d, m=m)
        print("thetalist:\n", thetalist)
    else:
        thetalist=Tlist

    
    if fixed: #fixed contexts 
        if cxts is None:  #generate_regular_context vectors
            contexts=fixed_regular_cxts(K=K, d=d, Tlist=thetalist)
        else:
            contexts=cxts
    
        #precalculation
        exp_rewards=np.matmul(contexts, thetalist.T)   
        true_pareto=pareto_front(exp_rewards) 
        print("pareto front: ", true_pareto)
        subopt_gap=[max(min(exp_rewards[i,:]-exp_rewards[j,:]) for i in true_pareto) for j in range(K)]
          
        for c in c_set:
            cumul_regret = np.zeros((Rep,T))
            theta_err = np.array([np.zeros((T,m)) for r in range(Rep)])
            elapsed_time = np.zeros((Rep,T))
            selected_rounds = np.zeros((Rep,K))
        
            for r in range(Rep):
                print('MOGLM-UCB Simulation %d, K=%d, d=%d, m=%d, c=%.3f' % (r+1, K, d, m, c))
                moGLM_UCB = MOGLM_UCB(d=d,m=m, c=c)
            
                regret = []
                t_err = []
                s_rounds = np.zeros(K)

                for t in trange(T):
                    # time
                    start = time.time()
                    a_t = moGLM_UCB.select_ac(contexts)
                    reward = exp_rewards[a_t,:]+ np.random.normal(0, sig, size=m)
                    moGLM_UCB.update(reward)
                    elapsed_time[r,t] = time.time() - start
                
                    #regret=suboptimality gap
                    regret.append(subopt_gap[a_t])

                    #theta error 
                    err= np.linalg.norm(moGLM_UCB.theta_hat-thetalist, axis=1)
                    t_err.append(err)
                
                    #selected rounds update 
                    s_rounds[a_t]+=1
                
                cumul_regret[r,:] = np.cumsum(regret)
                theta_err[r,:]=t_err
                selected_rounds[r,:]=s_rounds
            
                print("Total Regret:", cumul_regret[r,:][-1])
                print("Thera Error:", theta_err[r,:][-1])
            
            ##Save at dict
            results.append({'model':'MOGLM_UCB',
                            'context_type': 'fixed',
                            'settings':moGLM_UCB.settings,
                            'regrets':cumul_regret.tolist(),
                            'theta_err':theta_err.tolist(),
                            'selected_rounds':selected_rounds.tolist(),
                            'time':elapsed_time.tolist()})


    else: #stochastic contexts 
        for c in c_set:
            cumul_regret = np.zeros((Rep,T))
            theta_err = np.array([np.zeros((T,m)) for r in range(Rep)])
            elapsed_time = np.zeros((Rep,T))
            selected_rounds = np.zeros((Rep,K))
        
            for r in range(Rep):
                print('MOGLM-UCB Simulation %d, K=%d, d=%d, m=%d, c=%.3f' % (r+1, K, d, m, c))
                moGLM_UCB = MOGLM_UCB(d=d,m=m, c=c)
            
                regret = []
                t_err = []
                s_rounds = np.zeros(K)
        

                for t in trange(T):
                    # uniform_cxts(K, d, x_max=1, seed=0):
                    contexts=uniform_cxts(K=K, d=d, seed=10*t+r)
                    exp_rewards=np.matmul(contexts, thetalist.T)   
                    true_pareto=pareto_front(exp_rewards) 
                
                    start = time.time()
                    a_t = moGLM_UCB.select_ac(contexts)
                    reward = exp_rewards[a_t,:]+ np.random.normal(0, sig, size=m)
                    moGLM_UCB.update(reward)                
                    elapsed_time[r,t] = time.time() - start
                
                    #regret=suboptimality gap
                    subopt_gap=max(min(exp_rewards[i,:]-exp_rewards[a_t,:]) for i in true_pareto)
                    regret.append(subopt_gap)

                    #theta error 
                    err= np.linalg.norm(moGLM_UCB.theta_hat-thetalist, axis=1)
                    t_err.append(err)
                
                    #selected rounds update 
                    s_rounds[a_t]+=1
                
                cumul_regret[r,:] = np.cumsum(regret)
                theta_err[r,:]=t_err
                selected_rounds[r,:]=s_rounds
            
                print("Total Regret:", cumul_regret[r,:][-1])
                print("Thera Error:", theta_err[r,:][-1])
            
            ##Save at dict
            results.append({'model':'MOGLM_UCB',
                            'context_type': 'stochastic',
                            'settings':moGLM_UCB.settings,
                            'regrets':cumul_regret.tolist(),
                            'theta_err':theta_err.tolist(),
                            'selected_rounds':selected_rounds.tolist(),
                            'time':elapsed_time.tolist()})

    if output:
        return results
    else:
        # Save to txt file
        with open('./results/MOGLM_UCB_d%d_K%d_m%d.txt' % (d, K, m), 'w+') as outfile:
            json.dump(results, outfile)


def eval_MORR_Greedy(K, d, m, fixed=True, Tlist=None, cxts=None, lam_set=None, T=30000, Rep=10, sig=0.1, seed=0, output=False):
    #evaluate MORR_Greedy
    #inputs: Rep, K, d, m, T, sig(reward std), seed
    results = []

    
    if Tlist is None:
        #generate_diverse_theta's & fixed contexts 
        thetalist=generate_diverse_theta(d=d, m=m)
        print("thetalist:\n", thetalist)
    else:
        thetalist=Tlist
    

    if fixed: #fixed contexts
        if cxts is None:
            #generate_regular_context vectors
            contexts=fixed_regular_cxts(K=K, d=d, Tlist=thetalist)
        else:
            contexts=cxts
        
        #precalculation
        exp_rewards=np.matmul(contexts, thetalist.T)   
        true_pareto=pareto_front(exp_rewards) 
        print("pareto front: ", true_pareto)
        subopt_gap=[max(min(exp_rewards[i,:]-exp_rewards[j,:]) for i in true_pareto) for j in range(K)]
        if lam_set==None:
            lam_set=['theoretical']
        
        for lam in lam_set:
            cumul_regret = np.zeros((Rep,T))
            theta_err = np.array([np.zeros((T,m)) for r in range(Rep)])
            elapsed_time = np.zeros((Rep,T))
            selected_rounds = np.zeros((Rep,K))
        
            for r in range(Rep):
                if type(lam)==str:
                    print('MORR-Greedy Simulation %d, K=%d, d=%d, m=%d, lam=%s' % (r+1, K, d, m, lam))
                    lam_0=np.min(np.linalg.eigvals(thetalist.T@thetalist))/m
                    moRRGreedy = MORR_Greedy(d=d,m=m, T=T, lam_0=lam_0, contexts=contexts)
                else:
                    print('MORR-Greedy Simulation %d, K=%d, d=%d, m=%d, lam=%.3f' % (r+1, K, d, m, lam))
                    # call model
                    moRRGreedy = MORR_Greedy(d=d,m=m, lam=lam, contexts=contexts)

                regret = []
                t_err = []
                s_rounds = np.zeros(K)

                for t in trange(T):
                    # time
                    start = time.time()
                    a_t = moRRGreedy.select_ac(contexts, t%m)
                    reward = exp_rewards[a_t,:]+ np.random.normal(0, sig, size=m)
                    moRRGreedy.update(reward)
                    elapsed_time[r,t] = time.time() - start
                
                    #regret=suboptimality gap
                    regret.append(subopt_gap[a_t])
    
                    #theta error 
                    err= np.linalg.norm(moRRGreedy.theta_hat-thetalist, axis=1)
                    t_err.append(err)
                
                    #selected rounds update 
                    s_rounds[a_t]+=1
                
                cumul_regret[r,:] = np.cumsum(regret)
                theta_err[r,:]=t_err
                selected_rounds[r,:]=s_rounds
            
                print("Total Regret:", cumul_regret[r,:][-1])
                print("Thera Error:", theta_err[r,:][-1])
            
            ##Save at dict
            results.append({'model':'MORR-Greedy',
                            'context_type': 'fixed',
                            'settings':moRRGreedy.settings,
                            'regrets':cumul_regret.tolist(),
                            'theta_err':theta_err.tolist(),
                            'selected_rounds':selected_rounds.tolist(),
                            'time':elapsed_time.tolist()})



    else: #stochastic contexts
        for lam in lam_set:
            cumul_regret = np.zeros((Rep,T))
            theta_err = np.array([np.zeros((T,m)) for r in range(Rep)])
            elapsed_time = np.zeros((Rep,T))
            selected_rounds = np.zeros((Rep,K))
        
            for r in range(Rep):
                print('MORR-Greedy Simulation %d, K=%d, d=%d, m=%d, lam=%.3f' % (r+1, K, d, m, lam))
                # call model
                moRRGreedy = MORR_Greedy(d=d,m=m, lam=lam)

                regret = []
                t_err = []
                s_rounds = np.zeros(K)

            
                for t in trange(T):
                    # uniform_cxts(K, d, x_max=1, seed=0):
                    contexts=uniform_cxts(K=K, d=d, seed=10*t+r)
                    exp_rewards=np.matmul(contexts, thetalist.T)   
                    true_pareto=pareto_front(exp_rewards) 
                
                    # time
                    start = time.time()
                    a_t = moRRGreedy.select_ac(contexts, t%m)
                    reward = exp_rewards[a_t,:]+ np.random.normal(0, sig, size=m)
                    moRRGreedy.update(reward)
                    elapsed_time[r,t] = time.time() - start
                
                    #regret=suboptimality gap
                    subopt_gap=max(min(exp_rewards[i,:]-exp_rewards[a_t,:]) for i in true_pareto)
                    regret.append(subopt_gap)

                    #theta error 
                    err= np.linalg.norm(moRRGreedy.theta_hat-thetalist, axis=1)
                    t_err.append(err)
                
                    #selected rounds update 
                    s_rounds[a_t]+=1
                
                cumul_regret[r,:] = np.cumsum(regret)
                theta_err[r,:]=t_err
                selected_rounds[r,:]=s_rounds
            
                print("Total Regret:", cumul_regret[r,:][-1])
                print("Thera Error:", theta_err[r,:][-1])
            
            ##Save at dict
            results.append({'model':'MORR-Greedy',
                            'context_type': 'stochastic',
                            'settings':moRRGreedy.settings,
                            'regrets':cumul_regret.tolist(),
                            'theta_err':theta_err.tolist(),
                            'selected_rounds':selected_rounds.tolist(),
                            'time':elapsed_time.tolist()})
    
    if output:
        return results
    else:
        # Save to txt file
        with open('./results/MORR_Greedy_d%d_K%d_m%d.txt' % (d, K, m), 'w+') as outfile:
            json.dump(results, outfile)


#Experiment 
exp_type=[201,211,221,231,241,251]
d_set=[5,5,5,5,10,10]
K_set=[50,100,50,100,100,200]
m_set=[5,5,10,10,10,10]
T=500
Rep=10
InsNum=10

result_P_UCB=[]
results_MOGLM_UCB=[]
result_MORR_Greedy = []

for type_id in range(6):
    exp_start=exp_type[type_id]
    d=d_set[type_id]
    m=m_set[type_id]
    K=K_set[type_id]
    
    for num in range(InsNum):
        exp_index=exp_start+num
        print("****************************************************")
        print("Experiment", exp_index)
        thetalist=generate_diverse_theta(d=d,m=m,lambda0xm=0.08)
        print("thetalist:\n", thetalist)

        info = [d, K, m, T, Rep] 
        print("d=%d, K=%d, m=%d, T=%d, Rep=%d" % (info[0], info[1], info[2], info[3], info[4]))

        c_set=[1, 0.1]        #width=c*dlog(det(Vt)/det(V1)) for c = 1, 0.1
        results_MOGLM_UCB = eval_MOGLM_UCB (K=K, d=d, m=m, fixed=False, Tlist=thetalist, c_set=c_set, T=T, Rep=Rep, output=True)
    
        lam_set=[1, 0.1, 0.01] 
        results_MORR_Greedy = eval_MORR_Greedy (K=K, d=d, m=m, fixed=False, Tlist=thetalist, lam_set=lam_set, T=T, Rep=Rep, output=True)


        final_results={'context_type':'stochastic',
                       'par_setting':'tunning', 
                       'information': info, 
                       'thetalist':thetalist.tolist(),
                       'context':contexts.tolist(),
                       'P_UCB':result_P_UCB,
                       'MOGLM_UCB':results_MOGLM_UCB,
                       'MORR_Greedy':results_MORR_Greedy}


        with open('./results/Experiment%d.pkl' % (exp_index), 'wb') as f:
            pickle.dump(final_results, f)


#gathering reputation results
exp_start=151
result=[]
num_c_set = 2
num_lam_set = 3 


regret_P_ucb=[]
set_glmucb= dict()
regret_glmucb= dict()
set_greedy =dict()
regret_greedy= dict()
thetalists=[]

for i in range(num_c_set):
    regret_glmucb[i]=[]
for i in range(num_lam_set):
    regret_greedy[i]=[]

for num in range(exp_start, exp_start+10):
    with open('./results/Experiment%d.pkl' % (num), 'rb') as f:
            exp = pickle.load(f)
    d=exp['information'][0]
    K=exp['information'][1]
    m=exp['information'][2]
    T=exp['information'][-2]
    Rep=exp['information'][-1]
    type=exp['context_type']
    thetalists.append(exp['thetalist']) 

    if(type=='fixed'):
        regret_P_ucb.append(exp['P_UCB']['regrets'])
    
    for i in range(num_c_set):
        set_glmucb[i]=exp['MOGLM_UCB'][i]['settings']               
        regret_glmucb[i].append(exp['MOGLM_UCB'][i]['regrets'])
        
    for i in range(num_lam_set):
        set_greedy[i]=exp['MORR_Greedy'][i]['settings']               
        regret_greedy[i].append(exp['MORR_Greedy'][i]['regrets'])


info= [d, K, m, T, Rep]

tot_result_P_UCB=[]
if(type=='fixed'):
    regret_P_ucb = np.vstack(regret_P_ucb)
    tot_result_P_UCB={'model':'P_UCB',
                    'regrets':regret_P_ucb.tolist()}

tot_results_MOGLM_UCB=[]
for i in range(num_c_set):
    regret_glmucb[i] = np.vstack(regret_glmucb[i])
    tot_results_MOGLM_UCB.append({'model':'MOGLM_UCB',
                        'context_type': type,
                        'thetalists': thetalists,
                        'settings':set_glmucb[i],
                        'regrets':regret_glmucb[i].tolist()})

tot_results_MORR_Greedy=[]
for i in range(num_lam_set):
    regret_greedy[i] = np.vstack(regret_greedy[i])
    tot_results_MORR_Greedy.append({'model':'MOGLM_UCB',
                        'context_type': type,
                        'thetalists': thetalists,
                        'settings':set_greedy[i],
                        'regrets':regret_greedy[i].tolist()})


final_results={'context_type':type,
               'par_setting':'tunning', 
               'information': info,
               'thetalist':thetalists,
               'P_UCB':tot_result_P_UCB,
               'MOGLM_UCB':tot_results_MOGLM_UCB,
               'MORR_Greedy':tot_results_MORR_Greedy}

with open('./results/Experiment%d.pkl' % (exp_start+200), 'wb') as f:
        pickle.dump(final_results, f)
    

#plot
for num in exp_index_set:
    plt.figure(figsize=(8, 5))
    with open('./results/Experiment%d.pkl' % (num), 'rb') as f:
        exp = pickle.load(f)
    print("****************************************************")
    print('Experiment%d' % (num))
    info=exp['information']
    print("d=%d, K=%d, m=%d, TPF_size=%d" % (info[0], info[1], info[2], info[3]))
    T=info[-2]

    print("Experiment with tunning values") 
    
    result_P_UCB=[]
    if exp['context_type']=='fixed':
        result_P_UCB=exp['P_UCB']
        mean_P_UCB = np.mean(result_P_UCB['regrets'], axis=0) 
        std_P_UCB = np.std(result_P_UCB['regrets'], axis=0)
        plt.plot(range(1, T+1), mean_P_UCB, color='#ff7f0e',  label = 'P-UCB')
        plt.errorbar(range(1, T+1), mean_P_UCB, yerr = std_P_UCB, errorevery= T // 5, capsize = 5, color='#ff7f0e', alpha=1)
    
   
    results_MOGLM_UCB=exp['MOGLM_UCB']    
    result=results_MOGLM_UCB[-1] #smallest c
    mean_MOGLM_UCB = np.mean(result['regrets'], axis=0)
    #mean_MOGLM_UCB = moving_average(mean_MOGLM_UCB, window_size)
    std_MOGLM_UCB = np.std(result['regrets'], axis=0)
    print(result['settings']['c'])
    final_c=round(result['settings']['c'],2)
    plt.plot(range(1, T+1), mean_MOGLM_UCB, color='#2ca02c', label = 'MOGLM-UCB')
    plt.errorbar(range(1, T+1), mean_MOGLM_UCB, yerr = std_MOGLM_UCB, errorevery= T // 5,  capsize = 3,color='#2ca02c', alpha=1)

    results_MORR_Greedy=exp['MORR_Greedy']
    result=results_MORR_Greedy[-1]
    mean_MORR_Greedy = np.mean(result['regrets'], axis=0)
    #mean_MORR_Greedy = moving_average(mean_MORR_Greedy, window_size)
    std_MORR_Greedy = np.std(result['regrets'], axis=0)
    final_lambda=round(result['settings']['lambda'],2)
    plt.plot(range(1, T+1), mean_MORR_Greedy, color='#9467bd', label = 'MORR-Greedy')
    plt.errorbar(range(1, T+1), mean_MORR_Greedy, yerr = std_MORR_Greedy, errorevery= T //5,  capsize = 5,color= '#9467bd', alpha=1) 


    plt.legend(loc='upper right', fontsize=18, borderpad=1, labelspacing=1)
    #plt.title("d=%d, K=%d, m=%d" % (info[0], info[1], info[2]), fontsize=20)
    plt.xlabel('Rounds',fontsize=18)
    plt.ylabel('Cumulative Pareto Regret',fontsize=18) 
    plt.xticks([100, 200, 300, 400, 500],fontsize=16)
    plt.xlim((0,500))
    if num<341:
        plt.yticks([10, 20, 30, 40, 50],fontsize=16)
        plt.ylim((0,50))
    else: 
        plt.yticks([10, 20, 30, 40, 50],fontsize=16)
        plt.ylim((0,50))
    plt.grid(alpha=0.3)
    
        

    savename= 'fixed_best_d{}_K{}_M{}'.format(info[0], info[1], info[2])
    plt.savefig(savename + '.pdf')
    #np.savetxt(savename+'.csv', np.concatenate((avg_regret, std_regret), axis = 0), delimiter=",")

    plt.show()
    plt.close()
