import numpy as np

from hand_env_utils.teleop_env import create_relocate_env
from hand_teleop.real_world.task_setting import IMG_CONFIG
from stable_baselines3.ppo import PPO

if __name__ == '__main__':
    checkpoint_path = "checkpoint/ppo_img/model/model_600.zip"
    use_visual_obs = True

    object_name = "mustard_bottle"
    algorithm_name = checkpoint_path.split("/")[-3].split("-")[0]
    env = create_relocate_env(object_name, use_visual_obs=use_visual_obs, use_gui=True)

    if use_visual_obs:
        if "img" in checkpoint_path:
            img_type = "robot"
            env.setup_imagination_config(IMG_CONFIG["relocate_robot_only"])
        else:
            img_type = None

    device = "cuda:0"
    policy = PPO.load(checkpoint_path, env, device)

    print(env.observation_space)
    viewer = env.render(mode="human")

    done = False
    action = np.zeros(22)
    while not viewer.closed:
        reward_sum = 0
        obs = env.reset()
        for i in range(env.horizon):
            action = policy.predict(observation=obs, deterministic=True)[0]
            obs, reward, done, _ = env.step(action)
            reward_sum += reward
            env.render()