from zoo.memory.config.memory_muzero_config import main_config, create_config
from lzero.entry import eval_muzero
import numpy as np

if __name__ == "__main__":
    """
    Overview:
        Evaluate the model performance by running multiple episodes with different seeds using the MuZero algorithm.
        The evaluation results (returns and mean returns) are printed out for each seed and summarized for all seeds. 
    Variables:
        - model_path (:obj:`str`): Path to the pretrained model's checkpoint file. Usually something like 
          "exp_name/ckpt/ckpt_best.pth.tar". Absolute path is recommended.
        - seeds (:obj:`List[int]`): List of seeds to use for evaluation. Each seed will run for a specified number 
          of episodes.
        - num_episodes_each_seed (:obj:`int`): Number of episodes to be run for each seed.
        - main_config (:obj:`EasyDict`): Main configuration for the evaluation, imported from the model's config file.
        - returns_mean_seeds (:obj:`List[float]`): List to store the mean returns for each seed.
        - returns_seeds (:obj:`List[List[float]]`): List to store the returns for each episode from each seed.
    Outputs:
        Prints out the mean returns and returns for each seed, along with the overall mean return across all seeds.

    .. note::
        The eval_muzero function is used here for evaluation. For more details about this function and its parameters, 
        please refer to its own documentation.
    """
    # model_path = './ckpt/ckpt_best.pth.tar'
    model_path = None
    # Initialize a list with a single seed for the experiment
    seeds = [0]

    # Set the number of episodes to run for each seed
    num_episodes_each_seed = 1

    # Specify the number of environments for the evaluator to use
    main_config.env.evaluator_env_num = 1

    # Set the number of episodes for the evaluator to run
    main_config.env.n_evaluator_episode = 1

    # The total number of test episodes is the product of the number of episodes per seed and the number of seeds
    total_test_episodes = num_episodes_each_seed * len(seeds)

    # Uncomment the following lines to save a replay of the episodes as an mp4 video
    # main_config.env.replay_path = './video'

    # Enable saving of replay as a gif, specify the path to save the replay gif
    main_config.env.save_replay_gif = True
    main_config.env.replay_path_gif = './video'

    # Initialize lists to store the mean and total returns for each seed
    returns_mean_seeds = []
    returns_seeds = []

    # For each seed, run the evaluation function and store the resulting mean and total returns
    for seed in seeds:
        returns_mean, returns = eval_muzero(
            [main_config, create_config],  # Configuration parameters for the evaluation
            seed=seed,  # The seed for the random number generator
            num_episodes_each_seed=num_episodes_each_seed,  # The number of episodes to run for this seed
            print_seed_details=False,  # Whether to print detailed information for each seed
            model_path=model_path  # The path to the trained model to be evaluated
        )
        # Append the mean and total returns to their respective lists
        returns_mean_seeds.append(returns_mean)
        returns_seeds.append(returns)

    # Convert the lists of returns to numpy arrays for easier statistical analysis
    returns_mean_seeds = np.array(returns_mean_seeds)
    returns_seeds = np.array(returns_seeds)

    # Print evaluation results
    print("=" * 20)
    print(f"We evaluated a total of {len(seeds)} seeds. For each seed, we evaluated {num_episodes_each_seed} episode(s).")
    print(f"For seeds {seeds}, the mean returns are {returns_mean_seeds}, and the returns are {returns_seeds}.")
    print("Across all seeds, the mean reward is:", returns_mean_seeds.mean())
    print("=" * 20)