from tokenize import Decnumber
import numpy as np
from numpy import *
from tqdm import tqdm
import matplotlib
import datetime
import torch
import os
from alg import Decentralized
from AP import Indifference
from  plt import run_plot_unstable
from  plt import run_plot_global_all
from  plt import run_plot_varyDelta_regret
from plt import run_plot_varyN_stable
from  plt import run_plot_varyDelta_stable
from plt import run_plot_global_max
from plt import run_plot_my
from plt import run_plot_my2
from plt import run_plot_varyN_regret




# two parameters: beta; N is the market size
# beta=-2: N=5 [0.1,0.9] for global preferences; N=3 for counterexample in appendix
# beta=-1: random preference [0.1,0.9], vary size N=5,10,15,20
# beta = 0, 10,50,,100: size N=5
# beta = 0.1/0.2/0.3/0.4:  vary Delta=0.05/0.1/0.15/0.2, N=5
# beta = -3: Fixed Delta=0.05, vary size N=5,10,20,40




horizon = 100000
trials = 20

beta = -2
N = 5

num_players = N
num_arms = N
delta = 0.25

# arm_preferences = np.array([[0.6, 0.3, 0.3, 0.7, 1.0, 0.2, 1.0, 1.3, 1.0, 0.9, 0.2, 0.3, ], [1.3, 1.0, 1.2, 1.0, 0.8, 0.2, 0.9, 0.8, 0.8, 1.1, 0.4, 0.6, ], [0.4, 1.3, 0.9, 1.1, 1.0, 0.1, 0.2, 0.7, 0.5, 0.1, 0.7, 0.1, ], [0.5, 0.5, 0.9, 0.7, 1.0, 1.1, 1.0, 1.4, 1.1, 0.6, 0.1, 0.8, ], [0.5, 0.5, 0.8, 1.4, 1.1, 0.2, 1.2, 0.1, 1.2, 0.1, 0.7, 0.7, ], [1.4, 1.3, 1.2, 1.2, 1.3, 0.9, 0.9, 0.9, 0.5, 1.3, 0.4, 1.2, ], [1.0, 1.3, 1.0, 0.4, 1.3, 0.6, 1.0, 0.2, 1.1, 1.2, 0.2, 1.0, ], [1.0, 0.4, 1.0, 0.9, 0.2, 0.8, 1.2, 0.8, 1.3, 0.6, 1.3, 0.2, ], [1.4, 1.0, 0.9, 0.2, 1.3, 0.4, 1.1, 1.2, 0.1, 1.0, 0.8, 0.6, ], [0.9, 0.4, 1.1, 0.6, 0.3, 0.2, 0.3, 1.3, 0.1, 0.4, 0.6, 1.2, ], [1.3, 0.5, 1.2, 1.4, 1.0, 0.6, 0.4, 0.4, 0.8, 0.1, 0.8, 1.0, ], [0.1, 1.3, 0.4, 1.0, 1.3, 0.9, 0.8, 1.0, 1.2, 0.8, 0.2, 1.1, ],])
# player_mean = np.array([[0.1, 0.3, 0.9, 1.1, 0.4, 1.0, 0.9, 0.3, 1.2, 1.0, 1.4, 0.7, ], [0.9, 0.9, 0.9, 0.1, 1.0, 0.4, 0.2, 0.6, 1.4, 1.2, 0.6, 1.3, ], [1.3, 1.3, 0.1, 1.0, 0.5, 1.0, 1.1, 0.3, 1.4, 0.8, 1.0, 1.0, ], [0.4, 1.1, 0.1, 0.1, 0.7, 0.8, 1.4, 1.4, 0.5, 1.0, 1.0, 1.1, ], [0.9, 0.5, 0.1, 1.3, 1.2, 0.6, 1.2, 0.6, 0.5, 0.3, 1.1, 1.0, ], [0.5, 0.6, 0.6, 0.8, 0.6, 0.6, 0.9, 1.2, 0.9, 1.2, 0.2, 0.4, ], [1.0, 1.4, 1.4, 1.1, 0.7, 0.1, 0.8, 1.4, 0.7, 1.3, 1.4, 0.4, ], [0.6, 0.4, 0.5, 0.7, 1.4, 1.1, 0.3, 1.4, 1.3, 0.3, 0.6, 1.1, ], [1.2, 0.2, 1.1, 1.1, 0.9, 0.7, 0.4, 0.5, 0.6, 1.1, 0.2, 1.1, ], [0.2, 0.6, 1.2, 0.8, 1.3, 1.1, 1.3, 1.0, 0.7, 0.4, 0.6, 0.6, ], [1.2, 1.0, 1.4, 1.1, 0.7, 0.7, 1.3, 0.1, 0.5, 0.4, 0.6, 0.6, ], [0.8, 1.0, 1.0, 0.4, 0.4, 1.4, 0.9, 0.8, 0.9, 0.8, 1.3, 1.0, ],])

# arm_preferences = np.array([[0.4, 1.1, 0.2, 0.6, 0.2, 0.1, 0.3, 0.7, 0.9, ], [1.0, 0.5, 1.0, 1.1, 0.2, 0.5, 0.4, 1.1, 0.7, ], [0.8, 0.4, 0.7, 1.0, 0.6, 0.9, 0.5, 0.4, 0.5, ], [0.1, 0.4, 1.0, 1.0, 1.1, 0.8, 0.6, 0.6, 1.0, ], [0.5, 0.9, 0.9, 1.0, 0.9, 0.4, 1.1, 0.9, 1.0, ], [0.1, 0.1, 0.3, 0.6, 0.1, 0.3, 0.5, 0.1, 0.1, ], [0.9, 0.2, 0.5, 0.9, 0.2, 0.6, 0.9, 0.8, 0.2, ], [0.2, 0.7, 0.3, 0.2, 0.3, 0.1, 0.9, 0.7, 0.7, ], [1.0, 0.6, 1.0, 1.0, 0.7, 0.6, 0.1, 0.4, 0.6, ],])
# player_mean = np.array([[0.9, 0.3, 0.7, 1.0, 0.6, 0.6, 0.5, 0.5, 0.8, ], [0.8, 0.6, 0.1, 0.4, 0.1, 0.6, 0.4, 0.1, 1.1, ], [0.8, 0.3, 0.7, 0.1, 0.8, 0.3, 0.3, 0.5, 0.3, ], [1.1, 0.3, 0.4, 0.4, 0.9, 0.8, 1.1, 0.2, 0.9, ], [0.7, 1.0, 0.4, 0.4, 1.1, 0.1, 0.8, 0.7, 1.0, ], [0.8, 0.2, 1.1, 0.3, 0.6, 0.5, 0.7, 0.5, 0.8, ], [0.8, 1.1, 0.3, 0.5, 0.8, 0.1, 0.4, 0.2, 0.2, ], [0.5, 0.9, 1.0, 0.8, 1.1, 0.9, 0.3, 0.9, 0.5, ], [0.7, 0.5, 0.2, 0.6, 0.4, 0.4, 0.4, 1.1, 0.6, ],])

# arm_preferences = np.array([[0.1, 0.6, 0.8, 0.2, 0.2, 0.8, ], [0.3, 0.2, 0.7, 0.3, 0.3, 0.4, ], [0.8, 0.1, 0.7, 0.1, 0.8, 0.2, ], [0.5, 0.6, 0.2, 0.3, 0.7, 0.4, ], [0.7, 0.4, 0.4, 0.7, 0.3, 0.5, ], [0.3, 0.8, 0.1, 0.2, 0.4, 0.1, ]])
# player_mean = np.array([[0.2, 0.8, 0.2, 0.7, 0.3, 0.6, ], [0.6, 0.7, 0.2, 0.1, 0.4, 0.5, ], [0.3, 0.2, 0.3, 0.1, 0.1, 0.6, ], [0.6, 0.1, 0.5, 0.5, 0.3, 0.2, ], [0.8, 0.4, 0.8, 0.8, 0.7, 0.2, ], [0.3, 0.7, 0.6, 0.6, 0.6, 0.3, ],])

# arm_preferences = np.array([[2, 3, 3, ], [3, 3, 3, ], [2, 3, 2, ],])
# player_mean = np.array([[1, 3, 3, ], [3, 3, 2, ], [1, 3, 2, ],]) * delta



arm_preferences = np.array([[2, 6, 3, 6, 3, ], [1, 2, 4, 2, 5, ], [6, 3, 4, 1, 6, ], [5, 1, 3, 1, 2, ], [6, 2, 2, 6, 3, ],])
player_mean = np.array([[2.0, 4.0, 5.0, 6.0, 5.0, ], [4.0, 6.0, 4.0, 3.0, 5.0, ], [1.0, 4.0, 3.0, 4.0, 3.0, ], [3.0, 2.0, 1.0, 1.0, 3.0, ], [2.0, 6.0, 4.0, 3.0, 3.0, ], ]) * delta

test =  Indifference(horizon = horizon, trial=trials,  num_player=num_players, num_arm=num_arms, arm_preferences=arm_preferences, player_mean=player_mean)
test.run_AP(delta)  

test.run_ETC(h=3000, Beta=beta, delta = delta)

test.run_phasedETC(Beta=beta, delta = delta)

# run_plot_my(['Armpropose', 'ETC', 'PhasedETC'], ['AE-AGS', 'C-ETC', 'P-ETC'], [0.25, 0.2, 0.15, 0.1], [3, 6, 9, 12], trials1 = 20 , trials2 = 10, horizon = horizon)
# run_plot_my2(['Armpropose', 'ETC', 'PhasedETC'], ['AE-AGS', 'C-ETC', 'P-ETC'], [0.25, 0.2, 0.15, 0.1], [3, 6, 9], trials1 = 20 , trials2 = 20, horizon = horizon)

# run_plot_global_max(['Armpropose', 'ETC', 'PhasedETC'], ['AE-AGS', 'C-ETC', 'P-ETC'], 'none', trials, horizon)
run_plot_unstable(['Armpropose', 'ETC', 'PhasedETC'], ['AE-AGS', 'C-ETC', 'P-ETC'], 'none', trials, horizon)
run_plot_global_all(['Armpropose', 'ETC', 'PhasedETC'], ['AE-AGS', 'C-ETC', 'P-ETC'], trials, horizon,'none')
# run_plot_unstable(['ETC',], ['C-ETC'], 'none', trials, horizon)

# run_plot_varyDelta_regret(['Armpropose', 'ETC', 'PhasedETC'], ['AE-AGS', 'C-ETC', 'P-ETC'],[0.25, 0.2, 0.15, 0.1] , trials, horizon)
# run_plot_varyDelta_stable(['Armpropose', 'ETC', 'PhasedETC'], ['AE-AGS', 'C-ETC', 'P-ETC'],[0.25, 0.2, 0.15, 0.1] , trials, horizon)
# run_plot_varyN_regret(['Armpropose', 'ETC', 'PhasedETC'], ['AE-AGS', 'C-ETC', 'P-ETC'],[3, 6, 9, 12] , trials, horizon)
# run_plot_varyN_stable(['Armpropose', 'ETC', 'PhasedETC'], ['AE-AGS', 'C-ETC', 'P-ETC'],[3,6,9,12],trials, horizon)