"""
Miscellaneous utilities for testing purposes.
"""

def make_env(n_states=2,
             n_actions=1,
             init_state_value=0.25):
    """
    Creates a test environment with predictable dynamics.
    
      reward: max(action)
      next_state = [sum(action)]*n_states
    """
    import gymnasium as gym
    import numpy as np
    class TestEnv(gym.Env):
        def __init__(self):
            self.observation_space = gym.spaces.Box(low=-10.0, high=10.0, shape=(n_states,), dtype=np.float32)
            self.action_space = gym.spaces.Box(low=-10.0, high=10.0, shape=(n_actions,), dtype=np.float32)
            self._state = None
            self.reset()
        def reset(self, *, seed=None, options=None):
            self._state = np.full((n_states,), init_state_value, dtype=np.float32)
            return self._state.copy(), {}
        def step(self, a):
            assert a.shape == (n_actions,)
            r = float(a.max())
            self._state = np.full((n_states,), np.clip(a.sum(), a_min=-10.0, a_max=10.0), dtype=np.float32)
            return self._state.copy(), r, False, False, {}
    return TestEnv()

