from gym import register
from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv
import numpy as np


class InvertedPendulumEnv4(InvertedPendulumEnv):
    def _get_obs(self):
        return super()._get_obs().astype(np.float32)

    def step(self, a):
        ob, reward, done, info = super().step(a)
        notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .8)
        return ob, reward, not notdone, info


register('InvertedPendulum-v4', entry_point=InvertedPendulumEnv4, max_episode_steps=1000)
