import os
import gym
import argparse
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3 import SAC
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure
import time
import torch
import numpy as np


# Note: pybullet is not compatible yet with Gymnasium
# you might need to use `import rl_zoo3.gym_patches`
# and use gym (not Gymnasium) to instantiate the env
# Alternatively, you can use the MuJoCo equivalent "HalfCheetah-v4"
parser = argparse.ArgumentParser()
parser.add_argument("--seed", type=int, nargs='?', default=1)
parser.add_argument("--folder", type=str, nargs='?', default='/home/')
parser.add_argument("--xml_file", type=str, nargs='?', default='hopper_target.xml')
parser.add_argument("--env", type=str, nargs='?', default='Hopper-v3')
args = parser.parse_args()


vec_env = DummyVecEnv([lambda: gym.make(args.env, xml_file = args.folder + "assets/" + args.xml_file)]*1)

vec_env.seed(seed=args.seed)
set_random_seed(seed = args.seed)

# Automatically normalize the input features and reward

model = SAC.load(args.folder + "target_domain/logs/bellman/seed" + str(args.seed) + "/best_model")

obs = vec_env.reset()
episode_reward = 0

for _ in range(100000):
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated, info = vec_env.step(action)
    episode_reward += reward
    if terminated:
        print("Reward:", episode_reward)
        break