from garage.torch import set_gpu_mode

import torch
from garage import wrap_experiment
from garage.envs import GymEnv
from garage.experiment.deterministic import set_seed
from garage.sampler import RaySampler
from garage.torch.algos import TRPO
from garage.torch.policies import GaussianMLPPolicy
from garage.torch.value_functions import GaussianMLPValueFunction
from garage.trainer import Trainer
import numpy as np
from garage.torch.optimizers import OptimizerWrapper

from garage.torch.optimizers.SGD_optimizer import SGD
from garage.np.baselines import LinearFeatureBaseline

import argparse

parser = argparse.ArgumentParser()

parser.add_argument("--seed", type=int, default=1, help="random seed")

args = parser.parse_args()
print(args)

@wrap_experiment(archive_launch_repo=False,
                 log_dir="humanoid_trpo_seed={}".format(args.seed,)
                )

def trpo_humanoid(ctxt=None, seed=args.seed):
  
    n_epochs = 1000
    sampler_batch_size = 10000

    set_seed(seed)
    env = GymEnv('Humanoid-v2')

    trainer = Trainer(ctxt)

    policy = GaussianMLPPolicy(env.spec,
                               hidden_sizes=[64, 64],
                               hidden_nonlinearity=torch.tanh,
                               output_nonlinearity=None)

    value_function = LinearFeatureBaseline(env_spec=env.spec)

    sampler = RaySampler(agents=policy,
                         envs=env,
                         max_episode_length=500)

    algo = TRPO(env_spec=env.spec,
                policy=policy,
                value_function=value_function,
                sampler=sampler,
                discount=0.99,
                center_adv=False,
                neural_baseline=False,
                )
    

    trainer.setup(algo, env)
    trainer.train(n_epochs=n_epochs, batch_size=sampler_batch_size)

trpo_humanoid()
