#!/usr/bin/env python3

from garage import wrap_experiment
from garage.envs import GymEnv
from garage.experiment.deterministic import set_seed
from garage.np.baselines import LinearFeatureBaseline
from garage.sampler import RaySampler
from garage.torch.algos import SHARP
from garage.torch.optimizers import OptimizerWrapper
from garage.torch.optimizers.SHARP_optimizer import SHARPOptimizer
from garage.torch.policies import GaussianMLPPolicy
from garage.trainer import Trainer

a = 0.4
b = 15

def run_task(seed):
    @wrap_experiment(log_dir="/root/Data/jmlr/hopper-SHARP-a={}-b={}-seed={}".format(a, b,seed), archive_launch_repo=False)
    def sharp_hopper(ctxt=None, seed=None):
        """

        Args:
            ctxt (garage.experiment.ExperimentContext): The experiment
                configuration used by LocalRunner to create the snapshotter.
            seed (int): Used to seed the random number generator to produce
                determinism.
        """
        set_seed(seed)
        runner = Trainer(ctxt)

        n_epochs = 1000
        sampler_batch_size = 10000
        env = GymEnv('Hopper-v2')
        env._env.seed(seed)
        env.action_space.seed(seed)
        policy = GaussianMLPPolicy(env.spec,
                                   hidden_sizes=[64, 64], )

        value_function = LinearFeatureBaseline(env_spec=env.spec)
        sampler = RaySampler(agents=policy,
                             envs=env,
                             max_episode_length=500,
                             )

        policy_optimizer = OptimizerWrapper((SHARPOptimizer, {"a": a,
                                                              "b": b}), policy)

        #     policy_optimizer = OptimizerWrapper((torch.optim.Adam, {"lr":0.001} ), policy)
        algo = SHARP(env_spec=env.spec,
                     policy=policy,
                     value_function=value_function,
                     sampler=sampler,
                     discount=0.99,
                     center_adv=False,
                     policy_optimizer=policy_optimizer,
                     neural_baseline=False,

                     )

        runner.setup(algo, env)
        runner.train(n_epochs=n_epochs, batch_size=sampler_batch_size)


    sharp_hopper(seed=seed)

seeds = [14, 33, 3, 4, 49, ]
# seeds = [7, 8, 21, 28,35,41,16,10,27,1]

for seed in seeds:
    run_task(seed=seed)
