import numpy as np
from multiagent.core import World, Agent, Landmark
from multiagent.scenario import BaseScenario


class Scenario(BaseScenario):
    def make_world(self):
        world = World()
        # set any world properties first
        world.obs_dim = 2
        world.act_dim = 3
        num_agents = 2
        world.collaborative = False  #True
        # add agents
        world.agents = [Agent() for i in range(num_agents)]
        world.num_agents = num_agents
        for i, agent in enumerate(world.agents):
            agent.name = 'agent %d' % i
            agent.index = i  ###int
            agent.obs_dim = 2
            agent.act_dim = 3
            agent.movable = False
            agent.silent = True
            agent.tp = 0
        # make initial conditions
        world.neigh_comm = 1.0
        world.sight = world.neigh_comm
        world.payoff_matrix = np.zeros((num_agents, world.act_dim, world.act_dim))
        
        world.payoff_matrix[0] = [[20, 0, 0], [30, 10, 0], [0, 0, 5]]
        world.payoff_matrix[1] = [[15, 0, 0], [0, 5, 0], [0, 0, 10]]

        self.reset_world(world)
        return world

    def reset_world(self, world):
        for i, agent in enumerate(world.agents):
            agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)

    def benchmark_data(self, agent, world):
        # returns data for benchmarking purposes
        return (rew, collisions, min_dists, occupied_landmarks)

    def neighb(self, agent, world):
        agent.neib = np.ones(world.num_agents)
        return agent.neib
    
    def reward(self, agent, actions, world):  ###Local Reward
        # Agents are rewarded based on minimum agent distance to each landmark, penalized for collisions
        rew = 0
        if agent.index == 0:
            rew = world.payoff_matrix[0][actions[0], actions[1]]
        else:
            rew = world.payoff_matrix[1][actions[0], actions[1]]
        return rew

    def observation(self, agent, world):
        return np.concatenate([agent.state.p_pos])

    def done(self, agent, world):
        return True

