import gym
import numpy as np
from gym.envs.mujoco import mujoco_env

class MJCostWrapper(gym.Wrapper):
    def __init__(self, env: mujoco_env.MujocoEnv, radius=1):
        super().__init__(env)
        assert hasattr(env, "sim")
        self.env = env
        self.radius = radius

    def step(self, action):
        action = np.nan_to_num(action)
        obs, reward, done, info = self.env.step(action)

        reward_forward = reward

        pos = self.env.sim.data.qpos[0:2]
        vel = self.env.sim.data.qvel[0:2]
        x, y = pos[0], pos[1]
        dx, dy = vel[0], vel[1]
        reward_circle = -y * dx + x * dy
        reward_circle /= (1 + np.abs( np.sqrt(x **2 + y **2) - self.radius))


        return obs, [reward_forward, reward_circle], done, info
    

