"""
Code for creating a multiagent environment with one of the scenarios listed
in ./scenarios/.
Can be called by using, for example:
    env = make_env('simple_speaker_listener')
After producing the env object, can be used similarly to an OpenAI gym
environment.

A policy using this environment must output actions in the form of a list
for all agents. Each element of the list should be a numpy array,
of size (env.world.dim_p + env.world.dim_c, 1). Physical actions precede
communication actions in this array. See environment.py for more details.
"""
import time

import numpy as np


def make_env(scenario_name, benchmark=False):
    '''
    Creates a MultiAgentEnv object as env. This can be used similar to a gym
    environment by calling env.reset() and env.step().
    Use env.render() to view the environment on the screen.

    Input:
        scenario_name   :   name of the scenario from ./scenarios/ to be Returns
                            (without the .py extension)
        benchmark       :   whether you want to produce benchmarking data
                            (usually only done during evaluation)

    Some useful env properties (see environment.py):
        .observation_space  :   Returns the observation space for each agent
        .action_space       :   Returns the action space for each agent
        .n                  :   Returns the number of Agents
    '''
    from multiagent.environment import MultiAgentEnv
    import multiagent.scenarios as scenarios

    # load scenario from script
    scenario = scenarios.load(scenario_name + ".py").Scenario()
    # create world
    world = scenario.make_world()
    # create multiagent environment
    if benchmark:        
        env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data)
    else:
        env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation)
    return env


env = make_env('exp_tag')
env.discrete_action_input = True
act = env.action_space
obs = env.observation_space
for i, (a, o) in enumerate(zip(act, obs)):
    print(i, a.n, o.shape)
print(act[0].n)
# print(len(obs), obs[0].shape)
# _ = env.reset()[:20]
# done = False
# while not done:
#     action = [np.random.randint(act[0].n) for _ in range(env.n)]
#     obs, r, done_n, info = env.step(action)
#     done = all(done_n)
#     time.sleep(0.1)
#     # vis = env.render()
#     # print(len(vis), vis)
#     # print("obs:{}\nreward:{}\ndone:{}\ninfo:{}".format(len(obs), r, done, info))
