import numpy as np
import argparse

from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv, ParallelPettingZooEnv
from ray.tune.registry import register_env
#envs
from pettingzoo.sisl import pursuit_v4, waterworld_v3, multiwalker_v9

from gym.spaces import Discrete, Box
from ray import tune

seeds = [126291,241241516,10274] 

zoodic={"walkers": multiwalker_v9}
paramsdic = {"pursuit": 500,
            "waterworld": 500,
            "knights": 1, 
            "hanabi":1,
            "walkers": 500}
for envtype in zoodic.keys():
    env_raw = zoodic[envtype]

    def env_creator(args):
            return PettingZooEnv(env_raw.env())

    env = env_creator({})
    register_env("ZooEnv", lambda config : env_creator({}))

    obs_space = env.observation_space
    act_space = env.action_space

    policies = {"shared_policy": (None, obs_space, act_space, {})}

    # for all methods
    policy_ids = list(policies.keys())

    for seed in seeds:
        tune.run(
            "PPO",
            config={
                "rollout_fragment_length": paramsdic[envtype],
                "train_batch_size": paramsdic[envtype]*40,
                "env": "ZooEnv",
                "num_workers": 14,
                "num_gpus": 1.0,
                "seed":seed,
                "model": {
                    "fcnet_hiddens": [64,64],
                },
                "framework": "torch",
                "env_config": {},
                "batch_mode": "complete_episodes",
                "observation_filter": "NoFilter",
                "logger_config": "pretty_print",
                "multiagent": {
                    "policies": policies,
                    "policy_mapping_fn": (lambda agent_id: "shared_policy"),
                },},
                local_dir="~/ray_results",
                stop={"training_iteration": 500},
                checkpoint_at_end=True)
