import numpy as np
from math import *
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import sys, random, time
import matplotlib.ticker as mtick
from env import single_expert_dynamics,single_expert_stochastic_dynamics, expert1_reward, expert2_reward, expert3_reward, expert1_cost, expert2_cost, expert3_cost, feature1, feature2, feature3, expert_1_basis_constraint, expert_2_basis_constraint, expert_3_basis_constraint
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset
from mpl_toolkits.axes_grid1.inset_locator import inset_axes



iterations=51
num_trials=100
gamma=0.9
num_action=9

def reward_cost_list(trajectories,num_data):
  omega1=np.mat([1.0,-1.0]).T
  omega2=np.mat([1.0,-1.0]).T
  omega3=np.mat([1.0,-1.0]).T
  theta1=np.mat([1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0]).T
  theta2=np.mat([1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0]).T
  theta3=np.mat([1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0]).T
  reward_list=[]
  cost_list=[]
  for i in range(num_data):
    reward=0.0
    cost=0.0
    single_trajectory=trajectories[30*i:30*(i+1),:]
    for j in range(30):
      state1=np.mat(np.copy(single_trajectory[j][0:2])).T
      state2=np.mat(np.copy(single_trajectory[j][2:4])).T
      state3=np.mat(np.copy(single_trajectory[j][4:6])).T
      action1=np.mat(np.copy(single_trajectory[j][6])).T
      action2=np.mat(np.copy(single_trajectory[j][7])).T
      action3=np.mat(np.copy(single_trajectory[j][8])).T
      single_reward=expert1_reward(omega1,state1,action1)+expert2_reward(omega2,state2,action2)+expert3_reward(omega3,state3,action3)
      single_cost=expert1_cost(theta1,state1,action1)+expert2_cost(theta2,state2,action2)+expert3_cost(theta3,state3,action3)
      reward=reward+single_reward
      cost=cost+single_cost
    reward_list.append(reward)
    cost_list.append(cost)
  return reward_list, cost_list


distribution1=np.loadtxt("nominal_optimal_policy1_file.txt",dtype=float)
policy1=distribution1.reshape(9,9,num_action)
distribution2=np.loadtxt("nominal_optimal_policy2_file.txt",dtype=float)
policy2=distribution2.reshape(9,9,num_action)
distribution3=np.loadtxt("nominal_optimal_policy3_file.txt",dtype=float)
policy3=distribution3.reshape(9,9,num_action)

nominal_trajectories=np.loadtxt("nominal_optimal_trajectory_file.txt",dtype=float)
nominal_trajectories=nominal_trajectories.reshape(30*num_trials,9)
nominal_reward_list,nominal_cost_list=reward_cost_list(nominal_trajectories,num_trials)
nominal_constraint_violation=1


cost1_mean=np.array(np.loadtxt("cost1_mean_file.txt",dtype=float))/3000.0
cost2_mean=np.array(np.loadtxt("cost2_mean_file.txt",dtype=float))/3000.0
cost3_mean=np.array(np.loadtxt("cost3_mean_file.txt",dtype=float))/3000.0
cost4_mean=np.array(np.loadtxt("cost4_mean_file.txt",dtype=float))/3000.0
centralized_cost_mean=np.array(np.loadtxt("centralized_cost_mean_file.txt",dtype=float))/3000.0
expert_cost=np.array([0.0]*51)
expert_cost_nominal=np.array([nominal_constraint_violation]*51)
cost_MCE=np.array(np.loadtxt("MCE_cost_mean_file.txt",dtype=float))/3000.0
cost_ME=np.array(np.loadtxt("ME_cost_mean_file.txt",dtype=float))/3000.0

cost1_sd=np.array(np.loadtxt("cost1_sd_file.txt",dtype=float))/3000.0
cost2_sd=np.array(np.loadtxt("cost2_sd_file.txt",dtype=float))/3000.0
cost3_sd=np.array(np.loadtxt("cost3_sd_file.txt",dtype=float))/3000.0
cost4_sd=np.array(np.loadtxt("cost4_sd_file.txt",dtype=float))/3000.0
centralized_cost_sd=np.array(np.loadtxt("centralized_cost_sd_file.txt",dtype=float))/3000.0

false1_positive=np.array(np.loadtxt("false1_positive_file.txt",dtype=float))
false2_positive=np.array(np.loadtxt("false2_positive_file.txt",dtype=float))
false3_positive=np.array(np.loadtxt("false3_positive_file.txt",dtype=float))
false4_positive=np.array(np.loadtxt("false4_positive_file.txt",dtype=float))
centralized_false_positive=np.array(np.loadtxt("centralized_false_positive_file.txt",dtype=float))
ME_false_positive=np.array(np.loadtxt("ME_centralized_false_positive_file.txt",dtype=float))
MCE_false_positive=np.array(np.loadtxt("MCE_centralized_false_positive_file.txt",dtype=float))

false1_negative=np.array(np.loadtxt("false1_negative_file.txt",dtype=float))
false2_negative=np.array(np.loadtxt("false2_negative_file.txt",dtype=float))
false3_negative=np.array(np.loadtxt("false3_negative_file.txt",dtype=float))
false4_negative=np.array(np.loadtxt("false4_negative_file.txt",dtype=float))
centralized_false_negative=np.array(np.loadtxt("centralized_false_negative_file.txt",dtype=float))
ME_false_negative=np.array(np.loadtxt("ME_centralized_false_negative_file.txt",dtype=float))
MCE_false_negative=np.array(np.loadtxt("MCE_centralized_false_negative_file.txt",dtype=float))



plt.rcParams.update({'font.size': 14})
axis=np.arange(0,iterations)

 
fig, ax= plt.subplots()
ax.plot(axis,false1_positive,'-',color='tab:blue')
ax.plot(axis,false2_positive,'--',color='tab:orange')
ax.plot(axis,false3_positive,'-.',color='tab:green')
ax.plot(axis,false4_positive,':',color='tab:red')
ax.plot(axis,centralized_false_positive,'.',color='tab:purple',alpha=0.5)
ax.plot(axis,MCE_false_positive,'<-',color='tab:gray',alpha=1.0,markersize=3)
ax.plot(axis,ME_false_positive,'>-',color='tab:olive',alpha=1.0,markersize=3)
ax.set_xlabel('Outer iterations')

ax1=ax.twinx()
ax1.plot(axis,false1_negative,'-',color='tab:blue')
ax1.plot(axis,false2_negative,'--',color='tab:orange')
ax1.plot(axis,false3_negative,'-.',color='tab:green')
ax1.plot(axis,false4_negative,':',color='tab:red')
ax1.plot(axis,centralized_false_negative,'.',color='tab:purple',alpha=0.5)
ax1.plot(axis,MCE_false_negative,'<-',color='tab:gray',alpha=1.0,markersize=3)
ax1.plot(axis,ME_false_negative,'>-',color='tab:olive',alpha=1.0,markersize=3)


plt.xticks(np.arange(0,iterations,10))
fig.tight_layout()
plt.savefig('false_positive_negative.pdf')  
plt.show()























