# Test the half-cheetah environment
import numpy as np
import gymnasium as gym


# Sample rollouts from the environment
example_map = [[1, 1, 1, 1, 1, 1],
                [1, 'r', 0, 0, 1, 1],
                [1, 0, 0, 0, 0, 1],
                [1, 1, 0, 1, 0, 1],
                [1, 0, 0, 0, 'g', 1],
                [1, 1, 1, 'g', 'g', 1],
                [1, 1, 1, 1, 1, 1]]

env = gym.make('PointMaze_Medium_Diverse_G-v3', maze_map=example_map, max_episode_steps=1000)

# Get initial observations plus position goal, we set seed to set the same maze structure
curr_obs = env.reset(seed=0)[0]

# Get state and current + final position
state = curr_obs['observation']
final_pos = curr_obs['desired_goal']
curr_pos = curr_obs['achieved_goal']
reward_tot = 0
for i in range(1000):

    action = env.action_space.sample()
    # Actual Real Transition
    curr_obs, reward, done, _, _ = env.step(action)
    reward_tot += reward

    # Get state and current + final position
    next_state = curr_obs['observation']
    curr_pos = curr_obs['achieved_goal']

    if reward_tot > 0:
        print('Reward: ', reward_tot)
        print('Current position: ', curr_pos)
