#!/usr/bin/env python3

from garage import wrap_experiment
from garage.envs import GymEnv
from garage.experiment.deterministic import set_seed
from garage.np.baselines import LinearFeatureBaseline
from garage.sampler import RaySampler
from garage.torch.algos import SHARP
from garage.torch.optimizers import OptimizerWrapper
from garage.torch.optimizers.SHARP_optimizer import SHARPOptimizer
from garage.torch.policies import GaussianMLPPolicy
from garage.trainer import Trainer

a = 0.1
b = 1.5


def run_task(seed):
    @wrap_experiment(log_dir="/root/Data/jmlr/reacher-SHARP-a={}-b={}-seed={}".format(a, b, seed),
                     archive_launch_repo=False)
    def sharp_reacher(ctxt=None, seed=None):
        """

        Args:
            ctxt (garage.experiment.ExperimentContext): The experiment
                configuration used by LocalRunner to create the snapshotter.
            seed (int): Used to seed the random number generator to produce
                determinism.
        """
        set_seed(seed)
        runner = Trainer(ctxt)

        n_epochs = 10000
        sampler_batch_size = 1000

        env = (GymEnv('Reacher-v2'))
        env._env.seed(seed)
        env.action_space.seed(seed)

        policy = GaussianMLPPolicy(env.spec,
                                   hidden_sizes=[64, 64], )

        value_function = LinearFeatureBaseline(env_spec=env.spec)
        sampler = RaySampler(agents=policy,
                             envs=env,
                             max_episode_length=env.spec.max_episode_length,
                             )

        policy_optimizer = OptimizerWrapper((SHARPOptimizer, {"a": a,
                                                              "b": b}), policy)

        algo = SHARP(env_spec=env.spec,
                     policy=policy,
                     value_function=value_function,
                     sampler=sampler,
                     discount=0.99,
                     center_adv=False,
                     policy_optimizer=policy_optimizer,
                     neural_baseline=False

                     )

        runner.setup(algo, env)
        runner.train(n_epochs=n_epochs, batch_size=sampler_batch_size)

    sharp_reacher(seed=seed)

# seeds = [7, 8, 21, 28,35,41,16,10,27,1]

seeds = [14, 33, 3, 4, 49, ]
for seed in seeds:
    run_task(seed=seed)
