import numpy as np
import copy
import pickle
from games.npg import *
from common.variables import *
from common.helper_funcs import *
from common.randomization_tools import *
from ol.oful_proj import *
from games.stackR1 import *

game = StackelbergGame(theta_1=THETA_1_S1, theta_2=THETA_2_S1, alpha_1=ALPHA_1_S1, alpha_2=ALPHA_1_S1, lambda_=LAMBDA_S1)

game.simulate(span_T=BURN_IN_T)
print(game.get_history())
print(len(game.get_history()))

leader_cumulative_rewards_ucb_mc = []

for m in range(MC_DISPLAY):
    ucb_leader = UCBLeader(game)
    total_reward_ucb = ucb_leader.simulate_ucb(horizon=BANDIT_T)
    print(f"Total reward (UCB leader): {total_reward_ucb}")

    cumulative_rewards_ucb = ucb_leader.get_cumulative_rewards()

    print(f"Cumulative rewards (UCB leader): {cumulative_rewards_ucb}")
    leader_cumulative_rewards_ucb_mc.append(cumulative_rewards_ucb)

with open('saved_data/leader_r1_cum_ucb.pkl', 'wb') as f:
    pickle.dump(leader_cumulative_rewards_ucb_mc, f)

leader_cumulative_rewards_gisa_mc = []
for m in range(MC_DISPLAY):
    game_r1 = copy.deepcopy(game)
    gisa_obj = run_gisa_stack_r1(game_r1, ucb_obj=ucb_leader)    
    leader_cumulative_rewards_gisa = gisa_obj['leader_cumulative_rewards']
    leader_cumulative_rewards_gisa_mc.append(leader_cumulative_rewards_gisa)

with open('saved_data/leader_r1_cum_gisa.pkl', 'wb') as f:
    pickle.dump(leader_cumulative_rewards_gisa_mc, f)

a_max = game.optimize_leader()
max_reward = a_max * game.best_response(a_max)
print(max_reward)

theoretical_cum_max = np.cumsum(np.ones(BANDIT_T) * max_reward)
print(theoretical_cum_max)

with open('saved_data/leader_r1_theo_cum.pkl', 'wb') as f:
    pickle.dump(theoretical_cum_max, f)
