import collections
import time
import re
import gymnasium
import numpy as np
from gymnasium.spaces import Box

import ogbench
from utils.datasets import Dataset, GCDataset

class EpisodeMonitor(gymnasium.Wrapper):
    """Environment wrapper to monitor episode statistics."""

    def __init__(self, env, filter_regexes=None):
        super().__init__(env)
        self._reset_stats()
        self.total_timesteps = 0
        self.filter_regexes = filter_regexes if filter_regexes is not None else []

    def _reset_stats(self):
        self.reward_sum = 0.0
        self.episode_length = 0
        self.start_time = time.time()

    def step(self, action):
        observation, reward, terminated, truncated, info = self.env.step(action)

        # Remove keys that are not needed for logging.
        for filter_regex in self.filter_regexes:
            for key in list(info.keys()):
                if re.match(filter_regex, key) is not None:
                    del info[key]

        self.reward_sum += reward
        self.episode_length += 1
        self.total_timesteps += 1
        info['total'] = {'timesteps': self.total_timesteps}

        if terminated or truncated:
            info['episode'] = {}
            info['episode']['final_reward'] = reward
            info['episode']['return'] = self.reward_sum
            info['episode']['length'] = self.episode_length
            info['episode']['duration'] = time.time() - self.start_time

            if hasattr(self.unwrapped, 'get_normalized_score'):
                info['episode']['normalized_return'] = (
                    self.unwrapped.get_normalized_score(info['episode']['return']) * 100.0
                )

        return observation, reward, terminated, truncated, info

    def reset(self, *args, **kwargs):
        self._reset_stats()
        return self.env.reset(*args, **kwargs)

class FrameStackWrapper(gymnasium.Wrapper):
    """Environment wrapper to stack observations."""

    def __init__(self, env, num_stack):
        super().__init__(env)

        self.num_stack = num_stack
        self.frames = collections.deque(maxlen=num_stack)

        low = np.concatenate([self.observation_space.low] * num_stack, axis=-1)
        high = np.concatenate([self.observation_space.high] * num_stack, axis=-1)
        self.observation_space = Box(low=low, high=high, dtype=self.observation_space.dtype)

    def get_observation(self):
        assert len(self.frames) == self.num_stack
        return np.concatenate(list(self.frames), axis=-1)

    def reset(self, **kwargs):
        ob, info = self.env.reset(**kwargs)
        for _ in range(self.num_stack):
            self.frames.append(ob)
        if 'goal' in info:
            info['goal'] = np.concatenate([info['goal']] * self.num_stack, axis=-1)
        return self.get_observation(), info

    def step(self, action):
        ob, reward, terminated, truncated, info = self.env.step(action)
        self.frames.append(ob)
        return self.get_observation(), reward, terminated, truncated, info

def normalize_data(input_data):
    x_min, x_max = input_data[:, 0].min(), input_data[:, 0].max()
    y_min, y_max = input_data[:, 1].min(), input_data[:, 1].max()
    normalized_observations = np.zeros_like(input_data)
    normalized_observations[:, 0] = (input_data[:, 0] - x_min) / (x_max - x_min)  # Normalize x
    normalized_observations[:, 1] = (input_data[:, 1] - y_min) / (y_max - y_min)  # Normalize 
    return normalized_observations
    
def make_env_and_datasets(dataset_name, frame_stack=None, action_clip_eps=1e-5, context_len=None, number_of_meta_envs=None):
    """Make OGBench environment and datasets.

    Args:
        dataset_name: Name of the dataset.
        frame_stack: Number of frames to stack.

    Returns:
        A tuple of the environment, training dataset, and validation dataset.
    """
    from envs.ogbench.ant_utils import MazeVizWrapper
    
    if 'ogbench' in dataset_name:
        dataset_name = "-".join(dataset_name.split("-")[1:])
        env, train_dataset, val_dataset = ogbench.make_env_and_datasets(dataset_name, compact_dataset=False)
        eval_env = ogbench.make_env_and_datasets(dataset_name, env_only=True)
        
        env = EpisodeMonitor(env, filter_regexes=['.*privileged.*', '.*proprio.*'])
        eval_env = EpisodeMonitor(eval_env, filter_regexes=['.*privileged.*', '.*proprio.*'])
        eval_env = MazeVizWrapper(eval_env) # for visualizations
        
        train_dataset = Dataset.create(**train_dataset)
        val_dataset = Dataset.create(**val_dataset)
    
    if 'fourrooms-dynamics' in dataset_name:
        from envs.custom_mazes.darkroom import FourRoomsMazeEnv, Maze
        
        env = FourRoomsMazeEnv(Maze(seed=0, maze_type='fourrooms_random_layouts'))
        eval_env = FourRoomsMazeEnv(Maze(seed=0, maze_type='fourrooms_random_layouts'))
        env = EpisodeMonitor(env, filter_regexes=['.*privileged.*', '.*proprio.*'])
        eval_env = EpisodeMonitor(eval_env, filter_regexes=['.*privileged.*', '.*proprio.*'])
        train_dataset = np.load(f"aux_data/fourrooms_meta{number_of_meta_envs}_data.npy", allow_pickle=True)[()] # number means number of training layouts
        train_dataset = Dataset.create(**train_dataset)
        val_dataset = train_dataset
        
    if 'fourrooms-vanilla' in dataset_name:
        from envs.custom_mazes.darkroom import FourRoomsMazeEnv, Maze
        
        env = FourRoomsMazeEnv(Maze())
        eval_env = FourRoomsMazeEnv(Maze())
        env = EpisodeMonitor(env, filter_regexes=['.*privileged.*', '.*proprio.*'])
        eval_env = EpisodeMonitor(eval_env, filter_regexes=['.*privileged.*', '.*proprio.*'])
        train_dataset = np.load("aux_data/fourroom_vanilla_data.npy", allow_pickle=True)[()]
        train_dataset = Dataset.create(**train_dataset)
        val_dataset = train_dataset

    if 'gridworld' in dataset_name:
        from envs.custom_mazes.darkroom import FourRoomsMazeEnv, Maze
        
        env = FourRoomsMazeEnv(Maze(maze_type='gridworld', size=dataset_name.split("-")[-1]))
        eval_env = FourRoomsMazeEnv(Maze(maze_type='gridworld', size=dataset_name.split("-")[-1]))
        env = EpisodeMonitor(env, filter_regexes=['.*privileged.*', '.*proprio.*'])
        eval_env = EpisodeMonitor(eval_env, filter_regexes=['.*privileged.*', '.*proprio.*'])
        train_dataset = np.load("aux_data/gridworld_data.npy", allow_pickle=True)[()]
        train_dataset = Dataset.create(**train_dataset)
        val_dataset = train_dataset
    
    if 'doors-dynamics' in dataset_name:
        from envs.minigrid.doors_grid import DynamicsGeneralization_Doors, MinigridWrapper
        
        env = DynamicsGeneralization_Doors(render_mode="rgb_array", highlight=False, max_steps=context_len)
        eval_env = DynamicsGeneralization_Doors(render_mode="rgb_array", highlight=False, max_steps=context_len)
        env = MinigridWrapper(env)
        train_dataset = np.load(f"aux_data/doors_meta{number_of_meta_envs}_data.npy", allow_pickle=True).item() 
        train_dataset = Dataset.create(**train_dataset)
        val_dataset = train_dataset
    
    if frame_stack is not None:
        env = FrameStackWrapper(env, frame_stack)
        eval_env = FrameStackWrapper(eval_env, frame_stack)
        
    env.reset()
    eval_env.reset()
    if action_clip_eps is not None:
        train_dataset = train_dataset.copy(
            add_or_replace=dict(actions=np.clip(train_dataset['actions'], -1 + action_clip_eps, 1 - action_clip_eps))
        )
        if val_dataset is not None:
            val_dataset = val_dataset.copy(
                add_or_replace=dict(actions=np.clip(val_dataset['actions'], -1 + action_clip_eps, 1 - action_clip_eps))
            )
            
    return env, eval_env, train_dataset, val_dataset