import unittest
from src.searchlight.gameplay.simulators import GameSimulator
from src.searchlight.gameplay.agents import MCTSAgent, HumanAgent
from src.searchlight.classic_models import RandomRolloutValueHeuristic, ZeroValueHeuristic
from src.searchlightimprove.llm_utils.llm_api_models import GPT35Multi
from src.GOPS.baseline_models_GOPS import *

num_cards = 6
players = {0,1}

# create config
start_state = GOPSState.init_from_num_cards(num_cards)

actor_action_enumerator = GOPSActorActionEnumerator()
forward_transitor = GOPSForwardTransitor()
information_function = GOPSInformationFunction()
action_parser = GOPSActorActionEnumerator.parse_str_to_action

# create game simulator
simulator = GameSimulator(transitor=forward_transitor, actor_action_enumerator=actor_action_enumerator, information_function=information_function, start_state=start_state)
information_prior = GOPSInformationPrior()

# create inputs to AvalonActionPlannerAgent
value_heuristic = ZeroValueHeuristic()

# create 1 human agent, and fill the rest with bots
rng = np.random.default_rng(12)
agents = dict()
for player in players:
    if player == 1:
        agents[player] = HumanAgent(player, action_parser, rng)
    else:
        agents[player] = MCTSAgent(players=players, player=player, actor_action_enumerator=actor_action_enumerator, forward_transitor=forward_transitor, value_heuristic=value_heuristic, num_rollout=100, node_budget=100, rng=rng, information_function=information_function, information_prior=information_prior)

# set to random agent
agents[-1] = RandomAgent(rng=rng)
print(agents)
# simulate games
num_games = 1
avg_scores, trajectories = simulator.simulate_games(agents, num_games, display=False)

if avg_scores[1] > avg_scores[0]:
    print("Human wins!")
elif avg_scores[1] < avg_scores[0]:
    print("Bot wins!")
else:
    print("Tie!")

# print average scores
print(f"Final human score: {avg_scores[1]}, final bot score: {avg_scores[0]}")

# print("Average scores: ", avg_scores)