import numpy as np
from gymnasium.envs.mujoco.half_cheetah_v4 import HalfCheetahEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOHalfCheehtahEnv(HalfCheetahEnv, EzPickle):
    """
    ## Description
    Multi-objective version of the HalfCheetahEnv environment.

    See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/half_cheetah/) for more information.

    ## Reward Space
    The reward is 2-dimensional:
    - 0: Reward for running forward
    - 1: Control cost of the action
    """

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        EzPickle.__init__(self, **kwargs)
        self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
        self.reward_dim = 2

    def step(self, action):
        observation, reward, terminated, truncated, info = super().step(action)
        vec_reward = np.array([info["reward_run"], info["reward_ctrl"]], dtype=np.float32)

        scalar_reward = np.dot(vec_reward, self.w)
        info['reward_dim0'] = vec_reward[0]
        info['reward_dim1'] = vec_reward[1]

        return observation, scalar_reward, terminated, truncated, info

    def set_weight(self, w):
        self.w = w