import os
import datetime
from abc import ABC, abstractmethod
from environment.road_keypoints import RoadKeypoints
from models.muzero_models import calculate_support_scaling_factor


class AbstractGame(ABC):
    """
    Inherit this class for muzero to play
    """

    @abstractmethod
    def __init__(self, seed=None):
        pass

    @abstractmethod
    def step(self, action):
        """
        Apply action to the game.

        Args:
            action : action of the action_space to take.

        Returns:
            The new observation, the reward and a boolean if the game has ended.
        """
        pass

    @abstractmethod
    def legal_actions(self):
        """
        Should return the legal actions at each turn, if it is not available, it can return
        the whole action space. At each turn, the game have to be able to handle one of returned actions.

        For complex game where calculating legal moves is too long, the idea is to define the legal actions
        equal to the action space but to return a negative reward if the action is illegal.

        Returns:
            An array of integers, subset of the action space.
        """
        pass

    @abstractmethod
    def reset(self):
        """
        Reset the game for a new game.

        Returns:
            Initial observation of the game.
        """
        pass

    def close(self):
        """
        Properly close the game.
        """
        pass

    @abstractmethod
    def render(self):
        """
        Display the game observation.
        """
        pass

    def human_to_action(self):
        """
        For multiplayer games, ask the user for a legal action
        and return the corresponding action number.

        Returns:
            An integer from the action space.
        """
        choice = input(f"Enter the action to play for the player {self.to_play()}: ")
        while int(choice) not in self.legal_actions():
            choice = input("Ilegal action. Enter another action : ")
        return int(choice)

    def expert_agent(self):
        """
        Hard coded agent that MuZero faces to assess his progress in multiplayer games.
        It doesn't influence training

        Returns:
            Action as an integer to take in the current game state
        """
        raise NotImplementedError

    def action_to_string(self, action_number):
        """
        Convert an action number to a string representing the action.

        Args:
            action_number: an integer from the action space.

        Returns:
            String representing the action.
        """
        return str(action_number)


class Game(AbstractGame):
    """
    Game wrapper.
    """

    def __init__(self, game_config, seed=None):
        self.env = RoadKeypoints(**game_config)
        if seed is not None:
            self.env.seed(seed)

    def step(self, action):
        """
        Apply action to the game.

        Args:
            action : action of the action_space to take.

        Returns:
            The new observation, the reward and a boolean if the game has ended.
        """
        observation, reward, done, _ = self.env.step(action)

        return observation, reward, done

    def legal_actions(self):
        return self.env.legal_actions()

    def reset(self, idx=None):
        return self.env.reset(idx=idx)

    def close(self):
        """
        Properly close the game.
        """
        self.env.close()

    def render(self):
        """
        Display the game observation.
        """
        return self.env.render()


class BaseConfig:
    def __init__(self, output_folder_name="", game_name=""):
        self.seed = 0

        self.support_size = (
            300  # Value and reward are scaled  by an invertible transform
        )

        # used for the scaling in the support,
        # These are larger than the maximum possible attained value and reward
        self.max_possible_value = 1.6
        self.max_possible_reward = 1.6

        # We use linear scaling as it seems to work better in our case,
        # We only have small incremental rewards compared to most atari games
        self.support_scaling_factor_value = calculate_support_scaling_factor(
            self.support_size, self.max_possible_value
        )
        self.support_scaling_factor_reward = calculate_support_scaling_factor(
            self.support_size, self.max_possible_reward
        )

        self.discount = 1  # Chronological discount of the reward
        self.temperature_threshold = None

        # Root prior exploration noise
        self.root_dirichlet_alpha = 0.25
        self.root_exploration_fraction = 0.25

        # UCB formula
        self.pb_c_base = 19652
        self.pb_c_init = 1.25

        # enables the same states to be reached from different paths
        # in the search tree
        self.tree_is_dag = False
        self.use_last_model_value = True

        # Store
        self.results_path = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            "../results",
            game_name,
            output_folder_name + datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S"),
        )  # Path to store the model weights and TensorBoard logs

        # Adjust the self play / training ratio to avoid over/underfitting
        # Number of seconds to wait after each played game
        self.self_play_delay = 0
        # Number of seconds to wait after each training step
        self.training_delay = 0
        self.ratio = None  # Desired training steps per self played step ratio.
