import os
import gym
import argparse
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3 import SAC
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure

# Note: pybullet is not compatible yet with Gymnasium
# you might need to use `import rl_zoo3.gym_patches`
# and use gym (not Gymnasium) to instantiate the env
# Alternatively, you can use the MuJoCo equivalent "HalfCheetah-v4"
parser = argparse.ArgumentParser()
parser.add_argument("--seed", type=int, nargs='?', default=1)
parser.add_argument("--device", type=str, nargs='?', default='cuda:0')
parser.add_argument("--folder", type=str, nargs='?', default='/home/')
parser.add_argument("--env", type=str, nargs='?', default='Hopper-v3')
args = parser.parse_args()

log_folder = args.folder + "source/stable-baselines3-1.7.0/logs/source/seed" + str(args.seed) + "/"

vec_env = DummyVecEnv([lambda: gym.make(args.env)]*1)
eval_env = DummyVecEnv([lambda: Monitor(gym.make(args.env))]*1)
vec_env.seed(seed=args.seed)
eval_env.seed(seed=args.seed)
set_random_seed(seed = args.seed)
new_logger = configure(log_folder, ["stdout", "csv", "tensorboard"])
eval_callback = EvalCallback(eval_env, best_model_save_path=log_folder, log_path=log_folder, eval_freq=2000, deterministic=True, render=False, n_eval_episodes = 5)
# Automatically normalize the input features and reward

model = SAC("MlpPolicy", vec_env, verbose = 1, device = args.device, seed = args.seed)
model.set_logger(new_logger)
model.learn(total_timesteps=1000000, progress_bar = True, callback=eval_callback)
