from games.stackRN import *
from common.variables import *
import copy

# Instantiate the class and simulate the game

# theta_A = np.array([-1.0, 2.0, -3.0, -4.0, 5.0])
# theta_B = np.array([5.0, -2.0, 4.0, 4.0, -3.0])

theta_A = THETA_A
theta_B = THETA_B

game = StackelbergGame(n=5, theta_a=theta_A, theta_b=theta_B)



game.simulate(span_T=BURN_IN_T)
print(game.get_history())
print(len(game.get_history()))

# UCB
leader_cumulative_rewards_ucb_mc = []

for m in range(MC_DISPLAY):
    game_ucb = copy.deepcopy(game)
    ucb_leader = UCBLeaderN(game_ucb)
    total_reward_ucb = ucb_leader.simulate_ucb(horizon=BANDIT_T)
    print(f"Total reward (UCB leader): {total_reward_ucb}")

    # Get cumulative rewards for the UCB leader
    cumulative_rewards_ucb = ucb_leader.get_cumulative_rewards()

    # Print the cumulative rewards for both strategies
    # print(f"Cumulative rewards (optimized leader): {cumulative_rewards_opt}")
    print(f"Cumulative rewards (UCB leader): {cumulative_rewards_ucb}")
    leader_cumulative_rewards_ucb_mc.append(cumulative_rewards_ucb)

with open('saved_data/leader_rn_cum_ucb.pkl', 'wb') as f:
    pickle.dump(leader_cumulative_rewards_ucb_mc, f)


# GISA
leader_cumulative_rewards_gisa_mc = []
for m in range(MC_DISPLAY):
    game_rn = copy.deepcopy(game)
    gisa_obj = run_gisa_stack_rn(game_rn, bandit_t=BANDIT_T, ucb_obj=ucb_leader)    
    leader_cumulative_rewards_gisa = np.cumsum(gisa_obj['leader_rewards_history'])
    leader_cumulative_rewards_gisa_mc.append(leader_cumulative_rewards_gisa)

with open('saved_data/leader_rn_cum_gisa.pkl', 'wb') as f:
    pickle.dump(leader_cumulative_rewards_gisa_mc, f)

# Get Perfect Info best response:

# # R1 Perfect
a_max, max_reward = game.optimize_leader()
print(a_max, max_reward)

theo_cum = np.cumsum(np.ones(BANDIT_T) * max_reward)

print("GISA:", leader_cumulative_rewards_gisa_mc)
print("UCB:", leader_cumulative_rewards_ucb_mc)


with open('saved_data/leader_rn_theo_cum.pkl', 'wb') as f:
    pickle.dump(theo_cum, f)
