import numpy as np
import argparse

from longroad.envs import IntegerRoadEnv,ZooIntegerRoadEnv
from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv, ParallelPettingZooEnv
from ray.tune.registry import register_env



from gym.spaces import Discrete, Box
from ray import tune



parser = argparse.ArgumentParser(description='Trail Settings')
parser.add_argument('--agentsize',
    type=int,
    help="agentsize", default=10)
parser.add_argument('--yellow',
    type=int,
    help="yellow phase", default=1)
parser.add_argument('--seeds',
    type=int,
    help="seeds option", default=1)
parser.add_argument('--it',
    type=int,
    help="iteration option", default=500)
parser.add_argument('--path',
    help="method option", default="~/ray_results")


args=parser.parse_args()
print(args)
agentsize=args.agentsize #10
yellow=bool(args.yellow)# True
global_re1 = 0.01 if yellow else 0.1
seeds = []
iterations = args.it
if(args.seeds):
    seeds=[126291,241241516,10274]
else:
    seeds=[126291]
logpath = args.path


def env_creator(args):
        return ParallelPettingZooEnv(ZooIntegerRoadEnv(agentsize=agentsize,measure=True,yellow=yellow, global_re1=global_re1,global_re2=0.1,episode_length=50))

env = env_creator({})
register_env("ZooIntegerRoad", env_creator)

obs_space = env.observation_space
act_space = env.action_space

policies = {"shared_policy": (None, obs_space, act_space, {})}

# for all methods
policy_ids = list(policies.keys())

for seed in seeds:
    tune.run(
        "PPO",
        config={
            "rollout_fragment_length": 50,
            "train_batch_size": 4000,
            "env": "ZooIntegerRoad",
            "num_workers": 1,
            "num_gpus": 1.0,
            "seed":seed,
            "framework": "torch",
            "env_config": {"agentsize":10,"yellow":yellow,"global_re1":global_re1},
            "batch_mode": "complete_episodes",
            "observation_filter": "NoFilter",
            "logger_config": "pretty_print",
            "multiagent": {
                "policies": policies,
                "policy_mapping_fn": (lambda agent_id: "shared_policy"),
            },},
            local_dir=logpath,
            stop={"training_iteration": 500},
            checkpoint_at_end=True)

