import os
os.environ["OMP_NUM_THREADS"] = "1"
import numpy
from sgcrl.utils.imports import get_arguments, get_class, instantiate_class
from sgcrl.utils.gym import Bot, PytorchD4RLGymEnv

import time
from tqdm import tqdm

def single_evaluation(model_db, env_yaml, cfg, logger, idx_model):
    env = PytorchD4RLGymEnv(get_class(env_yaml), **get_arguments(env_yaml))
    #env = PytorchGymnasiumEnv(get_class(env_yaml), **get_arguments(env_yaml))

    if cfg.render:
        env.set_render(True)
    seed = 1562+idx_model*955
    epoch = 0    

    idx_model = model_db.size("model") - 1
    assert idx_model<model_db.size("model")

    bot = model_db.get("model", idx_model)
    assert isinstance(bot, Bot)

    rewards = []
    subgoals_rewards = []
    scores = []
    lengths = []
    with tqdm(range(cfg.n_episodes), total=cfg.n_episodes, desc="Evaluating model") as pbar:
        for episode in pbar:
            episode = env.gather_episode(
                bot=bot, seed=episode, bot_args=cfg.bot_args
            )
            if "normalized_score" in episode.keys():
                scores.append(episode["normalized_score"])
            rewards.append(episode[cfg.reward_variable].sum().item())
            lengths.append(len(episode[cfg.reward_variable]))
            subgoals_rewards.append(max(int(bot.current_phase == 2), episode[cfg.reward_variable].sum().item()))

            pbar.set_postfix(reward=numpy.mean(rewards), subgoal_reward=numpy.mean(subgoals_rewards), length=numpy.mean(lengths))


    print(f"reward = {numpy.mean(rewards)}", end='')
    logger.add_scalar("avg_reward", numpy.mean(rewards), idx_model)
    logger.add_scalar("avg_reward_subgoal", numpy.mean(subgoals_rewards), idx_model)
    logger.add_scalar("avg_length", numpy.mean(lengths), idx_model)
    if "normalized_score" in episode.keys():
        logger.add_scalar("norm_score", numpy.mean(rewards), idx_model)
        print(f" normalized score {numpy.mean(scores)}", end='')
    print("")



def evaluation_loop(model_db, env_yaml, cfg, logger):
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    env = PytorchD4RLGymEnv(get_class(env_yaml), **get_arguments(env_yaml))
    seed = 1562
    _last_size=0   
    idx_model = 0
    while True:
        seed += 452
        while(model_db.size("model")==_last_size):
            time.sleep(1.0)
        bot = model_db.get("model", _last_size)
        assert isinstance(bot, Bot)
        _last_size += 1

        rewards = []
        subgoals_rewards = []
        scores = []
        lengths = []
        with tqdm(range(cfg.n_episodes), total=cfg.n_episodes, desc="Evaluating model") as pbar:
            for episode in pbar:
                episode = env.gather_episode(
                    bot=bot, seed=episode, bot_args=cfg.bot_args
                )
                if "normalized_score" in episode.keys():
                    scores.append(episode["normalized_score"])
                rewards.append(episode[cfg.reward_variable].sum().item())
                lengths.append(len(episode[cfg.reward_variable]))
                subgoals_rewards.append(max(int(bot.current_phase == 2), episode[cfg.reward_variable].sum().item()))

                pbar.set_postfix(reward=numpy.mean(rewards), subgoal_reward=numpy.mean(subgoals_rewards), length=numpy.mean(lengths))
        print(f"reward = {numpy.mean(rewards)}", end='')
        logger.add_scalar("avg_reward", numpy.mean(rewards), idx_model)
        logger.add_scalar("avg_reward_subgoal", numpy.mean(subgoals_rewards), idx_model)
        logger.add_scalar("avg_length", numpy.mean(lengths), idx_model)
        if "normalized_score" in episode.keys():
            logger.add_scalar("norm_score", numpy.mean(rewards), idx_model)
            print(f" normalized score {numpy.mean(scores)}", end='')
        print("")

        if _last_size == cfg.max_db_size + 1:  # + 1 to account for the model__0 (random bot)
            print("Evaluation done")
            exit(0)
            return
        
        idx_model += 1

