import gymnasium as gym

# Environments are already sparse (0 and 1 for reached goal)

# Create the Point Maze environment
# 'r' is a reset cell, 'g' is a goal cell, and 0 is a free cell, 1 a wall
example_map = [[1, 1, 1, 1, 1],
          [1, 'r', 0, 'g', 1],
          [1, 1, 1, 0, 1],
          [1, 0, 0, 0, 1],
          [1, 1, 1, 1, 1]]

env = gym.make('PointMaze_UMaze-v3', maze_map=example_map)

# PointMaze_Medium_Diverse_G-v3 is instead a maze with 3 goals and 1 reset cells
env = gym.make('PointMaze_Medium_Diverse_G-v3')


# Run episodes to explore the environment
num_episodes = 1000  # Adjust as needed

for episode in range(num_episodes):

    obs, success = env.reset(seed=0)
    done = False

    # Print initial position and goal position
    print('\n\nInitial position: ', obs['achieved_goal'])
    print('Goal position: ', obs['desired_goal'], '\n\n')

    while not done:
        # Access relevant state information from the observation dictionary
        x = obs['achieved_goal'][0]
        y = obs['achieved_goal'][1]

        # Discretize the current state into a cell
        x_cell = int((x - env.observation_space['achieved_goal'].low[0]) /
                     (env.observation_space['achieved_goal'].high[0] - env.observation_space['achieved_goal'].low[0]) * num_x_cells)
        y_cell = int((y - env.observation_space['achieved_goal'].low[1]) /
                     (env.observation_space['achieved_goal'].high[1] - env.observation_space['achieved_goal'].low[1]) * num_y_cells)
        current_cell = (x_cell, y_cell)

        # Mark the current cell as visited
        visited_states.add(current_cell)

        # Take an action in the environment
        action = env.action_space.sample()  # Replace with your RL agent's action selection
        obs, _, done, _ = env.step(action)

# Calculate the percentage of visited states
percentage_visited_states = len(visited_states) / total_cells * 100

print("Percentage of Progressively Visited States:", percentage_visited_states)
