import numpy as np
from multiagent.core import World, Agent, Landmark
from multiagent.scenario import BaseScenario


class Scenario(BaseScenario):
    def make_world(self):
        world = World()
        # set any world properties first
        num_agents = 3
        world.collaborative = False  #True
        # add agents
        world.agents = [Agent() for i in range(num_agents)]
        world.num_agents = num_agents
        for i, agent in enumerate(world.agents):
            agent.name = 'agent %d' % i
            agent.index = i  ###int
            agent.obs_dim = 2
            if i == 1:
                agent.act_dim = 2
            else:
                agent.act_dim = 3
            agent.movable = False
            agent.silent = True
            agent.tp = 0
        # make initial conditions
        world.neigh_comm = 1.0
        world.sight = world.neigh_comm
        world.payoff_matrix = np.zeros((num_agents, 3, 2, 3))


        world.payoff_matrix[0] = [[[-7, 6, 6], [9, 0, 0]],
                                  [[-1, 6, -2], [6, 7, 6]],
                                  [[-1, -1, 5], [-1, 0, 5]]]
        world.payoff_matrix[1] = [[[-7, -2, -2], [9, 6, -2]],
                                  [[5, 6, -2], [0, 7, -2]],
                                  [[5, -2, -2], [-2, 6, -2]]]
        world.payoff_matrix[2] = [[[10, 6, 6], [-4, 6, -2]],
                                  [[6, 0, 0], [6, 6, 6]],
                                  [[6, 0, 0], [0, 6, -4]]]

        self.reset_world(world)
        return world

    def reset_world(self, world):
        for i, agent in enumerate(world.agents):
            agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)

    def benchmark_data(self, agent, world):
        # returns data for benchmarking purposes
        return (rew, collisions, min_dists, occupied_landmarks)

    def neighb(self, agent, world):
        agent.neib = np.ones(world.num_agents)
        return agent.neib
    
    def reward(self, agent, actions, world):  ###Local Reward
        # Agents are rewarded based on minimum agent distance to each landmark, penalized for collisions
        rew = 0
        if agent.index == 0:
            rew = world.payoff_matrix[0][actions[0], actions[1], actions[2]]
        elif agent.index == 1:
            rew = world.payoff_matrix[1][actions[0], actions[1], actions[2]]
        else:
            rew = world.payoff_matrix[2][actions[0], actions[1], actions[2]]
        return rew

    def observation(self, agent, world):
        return np.concatenate([agent.state.p_pos])

    def done(self, agent, world):
        return True

