
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

plt.rcParams['figure.figsize'] = (9,6)
plt.rcParams['font.size'] = 14
plt.rcParams['legend.fontsize'] = 24
#plt.rcParams['text.usetex'] = True
#plt.rcParams['font.family'] = 'serif'


SCHEDULE = 25
ARMS = 100

BATCHSIZE = 5
BETA = 0.999
EPISODEEND = 1000
INTERVAL = 50
RUNS = 200
TIMELIMIT = 3000
REINFORCELR = 0.001
numEpisode = np.arange(0, EPISODEEND + INTERVAL, INTERVAL) 


NOISY = True
nnRewards = []


deadlineFileName = (f'../testResults/deadline_env/deadlineIndexResults_arms_{ARMS}_timeLimit_{TIMELIMIT}_schedule_{SCHEDULE}.csv')
df = pd.read_csv(deadlineFileName)
deadlineRewards = df.iloc[:,1]
deadline5Percentile, deadline95Percentile = np.percentile(deadlineRewards, [5, 95])

deadlineRewards = sum(deadlineRewards[0:RUNS] / RUNS)

for i in range(RUNS):
    if NOISY:
        NeurWINFileName = (f'../testResults/deadline_env/noisy_results/nnIndexResults_arms_{ARMS}_batchSize_{BATCHSIZE}_run_{i}_timeLimit_{TIMELIMIT}_schedule_{SCHEDULE}.csv')
    else:

        NeurWINFileName = (f'../testResults/deadline_env/nnIndexResults_arms_{ARMS}_batchSize_{BATCHSIZE}_run_{i}_timeLimit_{TIMELIMIT}_schedule_{SCHEDULE}.csv')
    df = pd.read_csv(NeurWINFileName)
    runReward = df.iloc[:, 1]
    nnRewards.append(runReward)

nnVal = np.sum(nnRewards, 0) / RUNS

nnRewards = np.transpose(nnRewards)

percentile5 = np.percentile(nnRewards, 5, axis=1)
percentile95 = np.percentile(nnRewards, 95, axis=1)


'''
qLearningRewards = []

for i in range(RUNS):
    qLearningFileName = (f'../testResults/deadline_env/qLearningResults_arms_{ARMS}_run_{i}_schedule_{SCHEDULE}.csv')
    df = pd.read_csv(qLearningFileName)
    runReward = df.iloc[:, 1]
    qLearningRewards.append(runReward)



qLearningVals = np.sum(qLearningRewards, 0) / RUNS

qLearningRewards = np.transpose(qLearningRewards)


qLearningpercentile5 = np.percentile(qLearningRewards, 5, axis=1)
qLearningpercentile95 = np.percentile(qLearningRewards, 95, axis=1)


reinforceRewards = []

for i in range(RUNS):
    reinforceFileName = (f'../testResults/deadline_env/reinforceResults_arms_{ARMS}_batchSize_{BATCHSIZE}\
_lr_{REINFORCELR}_run_{i}_schedule_{SCHEDULE}.csv')
    df = pd.read_csv(reinforceFileName)
    runReward = list(df.iloc[:,1])
    reinforceRewards.append(runReward)

reinforceVals = np.sum(reinforceRewards, 0) / RUNS

reinforcePercentileRewards = np.transpose(reinforceRewards)


percentile5Reinforce = np.percentile(reinforcePercentileRewards, 5, axis=1)
percentile95Reinforce = np.percentile(reinforcePercentileRewards, 95, axis=1)
'''

plt.plot(numEpisode, nnVal, label=f'NeurWIN', color='b', linewidth=3.0, linestyle='solid')

#plt.plot(numEpisode, qLearningVals, label=f'QWIC', color='g', linewidth=3.0, linestyle='dashed')
#plt.plot(numEpisode, reinforceVals, label=f'REINFORCE', color='k', linewidth=3.0, linestyle='dotted')
plt.hlines(xmin=0, xmax=EPISODEEND, y=deadlineRewards, label='Deadline Whittle Index', color='r', linewidth=3.0, linestyle='dashdot')


#plt.fill_between(x=numEpisode,y1=qLearningpercentile5, y2=qLearningpercentile95, alpha=0.3, color='teal')#, label='Q-learning confidence bound')
plt.fill_between(x=numEpisode,y1=deadline5Percentile, y2=deadline95Percentile, alpha=0.3, color='orange')#, label='Deadline Whittle Index confidence bound')
plt.fill_between(x=numEpisode,y1=percentile5, y2=percentile95, alpha=0.3, color='green')#, label='NeurWIN confidence bound')
#plt.fill_between(x=numEpisode,y1=percentile5Reinforce, y2=percentile95Reinforce, alpha=0.3, color='slateblue')#, label='REINFORCE confidence bound')
plt.legend()

plt.xlabel('Number of Training Episodes', fontsize=24)
plt.ylabel(f'Total Discounted Rewards', fontsize=24)
plt.yticks(rotation=60)

if NOISY:
    plt.savefig(f'../plotResults/deadline_results/noisy_results/deadline_scheduling_training_values_{ARMS}_activate_{SCHEDULE}.pdf')
else:
    plt.savefig(f'../plotResults/deadline_results/deadline_scheduling_training_values_{ARMS}_activate_{SCHEDULE}.pdf')
plt.show()




