import time

import gymnasium as gym
import numpy as np

from jax_rl.wrappers.common import TimeStep


class ManipEpisodeMonitor(gym.ActionWrapper):
    """A class that computes episode returns and lengths."""
    def __init__(self, env: gym.Env):
        super().__init__(env)
        self._reset_stats()
        self.total_timesteps = 0

    def _reset_stats(self):
        self.reward_sum = 0.0
        self.episode_length = 0
        self.start_time = time.time()
        self.is_success = 0

    def step(self, action: np.ndarray) -> TimeStep:
        observation, reward, terminated, truncated, info = self.env.step(action)
        done = terminated or truncated

        self.reward_sum += reward
        self.episode_length += 1
        self.total_timesteps += 1
        self.is_success += info.get('is_success', 0)
        info['total'] = {'timesteps': self.total_timesteps}

        if done:
            info['episode'] = {}
            info['episode']['return'] = self.reward_sum
            info['episode']['length'] = self.episode_length
            info['episode']['duration'] = time.time() - self.start_time
            info['episode']['is_success'] = self.is_success
            info['episode']['final_pos_dist'] = info['pos_distance']
            info['episode']['final_rot_dist'] = info['rot_distance']

            if hasattr(self, 'get_normalized_score'):
                info['episode']['return'] = self.get_normalized_score(
                    info['episode']['return']) * 100.0

        return observation, reward, terminated, truncated, info

    def reset(self, seed=None, options=None) -> np.ndarray:
        self._reset_stats()
        return self.env.reset(seed=seed, options=options)