import numpy as np

from gymnasium import utils
from gymnasium.envs.mujoco import MujocoEnv
from gymnasium.spaces import Box
from safety_gymnasium.tasks.safe_velocity.safety_hopper_velocity_v1 import SafetyHopperVelocityEnv
from safety_gymnasium.utils.task_utils import add_velocity_marker, clear_viewer

DEFAULT_CAMERA_CONFIG = {
    "trackbodyid": 2,
    "distance": 3.0,
    "lookat": np.array((0.0, 0.0, 1.15)),
    "elevation": -20.0,
}


class SafetyHopperVelocityWindowAvgEnv(SafetyHopperVelocityEnv):
    """Hopper environment with a safety constraint on window average velocity."""

    def __init__(
            self,
            forward_reward_weight=1.0,
            ctrl_cost_weight=1e-3,
            healthy_reward=1.0,
            terminate_when_unhealthy=True,
            healthy_state_range=(-100.0, 100.0),
            healthy_z_range=(0.7, float("inf")),
            healthy_angle_range=(-0.2, 0.2),
            reset_noise_scale=5e-3,
            exclude_current_positions_from_observation=True,
            **kwargs,
    ):
        utils.EzPickle.__init__(
            self,
            forward_reward_weight,
            ctrl_cost_weight,
            healthy_reward,
            terminate_when_unhealthy,
            healthy_state_range,
            healthy_z_range,
            healthy_angle_range,
            reset_noise_scale,
            exclude_current_positions_from_observation,
            **kwargs,
        )

        self._forward_reward_weight = forward_reward_weight

        self._ctrl_cost_weight = ctrl_cost_weight

        self._healthy_reward = healthy_reward
        self._terminate_when_unhealthy = terminate_when_unhealthy

        self._healthy_state_range = healthy_state_range
        self._healthy_z_range = healthy_z_range
        self._healthy_angle_range = healthy_angle_range

        self._reset_noise_scale = reset_noise_scale

        self._exclude_current_positions_from_observation = (
            exclude_current_positions_from_observation
        )

        if exclude_current_positions_from_observation:
            observation_space = Box(
                # low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64
                low=-np.inf, high=np.inf, shape=(14,), dtype=np.float64
            )
        else:
            observation_space = Box(
                # low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64
                low=-np.inf, high=np.inf, shape=(15,), dtype=np.float64
            )

        MujocoEnv.__init__(
            self,
            "hopper.xml",
            4,
            observation_space=observation_space,
            default_camera_config=DEFAULT_CAMERA_CONFIG,
            **kwargs,
        )

        self._velocity_threshold = 0.7402
        self.model.light(0).castshadow = False

        # self.x_velocity = 0
        # self.aug_obs = np.zeros(3)
        self._window_size = 25
        self._last_n_x_velocity = np.zeros(self._window_size)
        self._last_timestep = 0

    # def _get_obs(self):
    #     position = self.data.qpos.flat.copy()
    #     velocity = np.clip(self.data.qvel.flat.copy(), -10, 10)
    #
    #     if self._exclude_current_positions_from_observation:
    #         position = position[1:]
    #
    #     aug_obs = self.aug_obs.flat.copy()
    #
    #     observation = np.concatenate((position, velocity, aug_obs)).ravel()
    #     return observation

    def _get_obs(self):
        position = self.data.qpos.flat.copy()
        velocity = np.clip(self.data.qvel.flat.copy(), -10, 10)

        if self._exclude_current_positions_from_observation:
            position = position[1:]

        aug_obs = np.zeros(3)
        if self._last_timestep > 0:

            # Rolling Window Average
            mask_array = np.full(self._last_n_x_velocity.shape, False)
            mask_array[self._last_timestep:] = ~mask_array[self._last_timestep:]
            masked_win_x_velocity = np.ma.array(self._last_n_x_velocity, mask=mask_array)
            win_mean_x_velocity = masked_win_x_velocity.mean(axis=-1, keepdims=True).data

            aug_obs[0] = win_mean_x_velocity[0]  # SMA_prev
            aug_obs[1] = min(self._last_timestep, self._window_size)  # k
            aug_obs[2] = self._last_n_x_velocity[aug_obs[1] - 1]  # p_{n-k+1}
            # p_{n} = self._last_n_x_velocity[0]
            # p_{n+1} not included because the next x_velocity (and x_position) is supposed to be inferred

        observation = np.concatenate((position, velocity, aug_obs)).ravel()
        return observation

    def step(self, action):
        x_position_before = self.data.qpos[0]
        self.do_simulation(action, self.frame_skip)
        x_position_after = self.data.qpos[0]
        x_velocity = (x_position_after - x_position_before) / self.dt

        ctrl_cost = self.control_cost(action)

        forward_reward = self._forward_reward_weight * x_velocity
        healthy_reward = self.healthy_reward

        rewards = forward_reward + healthy_reward
        costs = ctrl_cost

        self._last_n_x_velocity = np.append(x_velocity, self._last_n_x_velocity[:-1])
        self._last_timestep += 1

        observation = self._get_obs()
        reward = rewards - costs
        terminated = self.terminated
        info = {
            'x_position': x_position_after,
            'x_velocity': x_velocity,
        }

        # cost = float(x_velocity > self._velocity_threshold)
        cost = float(np.abs(observation[-3]) > self._velocity_threshold)

        if self.mujoco_renderer.viewer:
            clear_viewer(self.mujoco_renderer.viewer)
            add_velocity_marker(
                viewer=self.mujoco_renderer.viewer,
                pos=self.get_body_com('torso')[:3].copy(),
                vel=x_velocity,
                cost=cost,
                velocity_threshold=self._velocity_threshold,
            )
        if self.render_mode == 'human':
            self.render()
        return observation, reward, cost, terminated, False, info

    def reset_model(self):
        noise_low = -self._reset_noise_scale
        noise_high = self._reset_noise_scale

        qpos = self.init_qpos + self.np_random.uniform(
            low=noise_low, high=noise_high, size=self.model.nq
        )
        qvel = self.init_qvel + self.np_random.uniform(
            low=noise_low, high=noise_high, size=self.model.nv
        )

        self.set_state(qpos, qvel)

        self._last_n_x_velocity = np.zeros(self._window_size)
        self._last_timestep = 0

        observation = self._get_obs()
        return observation
