#!/usr/bin/env python3

from garage import wrap_experiment
from garage.envs import GymEnv
from garage.experiment.deterministic import set_seed
from garage.np.baselines import LinearFeatureBaseline
from garage.sampler import RaySampler
from garage.torch.algos import PAGEPG
from garage.torch.optimizers import OptimizerWrapper
from garage.torch.policies import GaussianMLPPolicy
from garage.trainer import Trainer
from garage.torch.optimizers.PAGEPG_optimizer import PAGEPGOptimizer
import torch

pt = 0.4
eta = 0.01


def run_task(seed):
    @wrap_experiment(archive_launch_repo=False,
                     log_dir="/root/Data/icml/final/reacher_pagepg_eta={}pt={}seed={}".format(
                         eta,
                         pt,
                         seed
                     ))
    def pagepg_reacher(ctxt=None, seed=0):
        n_epochs = 1000
        sampler_batch_size = 10000

        set_seed(seed)
        env = GymEnv('Reacher-v2')
        env._env.seed(seed)
        env.action_space.seed(seed)
        trainer = Trainer(ctxt)

        policy = GaussianMLPPolicy(env.spec,
                                   hidden_sizes=[64, 64],
                                   hidden_nonlinearity=torch.tanh,
                                   output_nonlinearity=None)

        value_function = LinearFeatureBaseline(env_spec=env.spec)

        sampler = RaySampler(agents=policy,
                             envs=env,
                             max_episode_length=50)

        policy_optimizer = OptimizerWrapper((PAGEPGOptimizer, {
            "pt": pt, "eta": eta

        }), policy)

        algo = PAGEPG(env_spec=env.spec,
                      policy=policy,
                      value_function=value_function,
                      sampler=sampler,
                      discount=0.99,
                      center_adv=False,
                      policy_optimizer=policy_optimizer,
                      neural_baseline=False,
                      )

        trainer.setup(algo, env)
        trainer.train(n_epochs=n_epochs, batch_size=sampler_batch_size)

    pagepg_reacher(seed=seed)


seeds = [14, 33, 3, 4, 49, ]
for seed in seeds:
    run_task(seed=seed)
