from __future__ import annotations

import numpy as np


class TimeLimit:
    def __init__(self, env, max_steps: int):
        self.env = env
        self.max_steps = max_steps
        self._t = 0

    def reset(self, **kwargs):
        self._t = 0
        return self.env.reset(**kwargs)

    def step(self, action):
        obs, rew, term, trunc, info = self.env.step(action)
        self._t += 1
        if self._t >= self.max_steps:
            trunc = True
        return obs, rew, term, trunc, info


class ActionNormalize:
    def __init__(self, env):
        self.env = env
        self.low = env.action_space.low
        self.high = env.action_space.high

    def reset(self, **kwargs):
        return self.env.reset(**kwargs)

    def step(self, action):
        act = np.clip(action, -1.0, 1.0)
                                       
        scaled = self.low + (act + 1.0) * 0.5 * (self.high - self.low)
        return self.env.step(scaled)

