import numpy as np

from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv, ParallelPettingZooEnv
from ray.tune.registry import register_env
from supersuit import flatten_v0

#envs
from pettingzoo.sisl import pursuit_v4, waterworld_v3, multiwalker_v9

from gym.spaces import Discrete, Box
from ray import tune

logpath ="~/ray_results"

seeds = [126291,241241516,10274] 

zoodic={"pursuit": pursuit_v4}
paramsdic = {"pursuit": 500,
            "walkers": 500}
for envtype in zoodic.keys():
    env_raw = zoodic[envtype]

    def env_creator(args):
            return PettingZooEnv(flatten_v0(env_raw.env()))

    env = env_creator({})
    register_env("ZooEnv", lambda config : env_creator({}))

    obs_space = env.observation_space
    act_space = env.action_space

    policies = {"shared_policy": (None, obs_space, act_space, {})}

    # for all methods
    policy_ids = list(policies.keys())

    for seed in seeds:
        tune.run(
            "APEX",
            config={
                #"mixer": "qmix",
                "rollout_fragment_length": paramsdic[envtype],
                "train_batch_size": paramsdic[envtype]*40,
                "env": "ZooEnv",
                "num_workers": 10,
                "num_gpus": 1.0,
                "seed":seed,
                "model": {
                   "fcnet_hiddens": [64,64],
                },
                "framework": "torch",
                "env_config": {},
                #"batch_mode": "complete_episodes",
                "observation_filter": "NoFilter",
                "logger_config": "pretty_print",
                 "multiagent": {
                    "policies": policies,
                   "policy_mapping_fn": (lambda agent_id: "shared_policy"),
                },},
                local_dir=logpath,
                stop={"training_iteration": 200},
                checkpoint_at_end=True)
