from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os

from ..multiplayer_multiagentenv import MultiPlayer_MultiAgentEnv
from ..starcraft2.maps import get_map_params

import pdb
import atexit
import random
from warnings import warn
from operator import attrgetter
from copy import deepcopy
import numpy as np
import enum
import math
from absl import logging

from pysc2 import maps
from pysc2 import run_configs
from pysc2.lib import protocol

from s2clientprotocol import common_pb2 as sc_common
from s2clientprotocol import sc2api_pb2 as sc_pb
from s2clientprotocol import raw_pb2 as r_pb
from s2clientprotocol import debug_pb2 as d_pb

from . import portspicker
from . import run_parallel
from ..scripts import SCRIPT_DICT

races = {
    "R": sc_common.Random,
    "P": sc_common.Protoss,
    "T": sc_common.Terran,
    "Z": sc_common.Zerg,
}

difficulties = {
    "1": sc_pb.VeryEasy,
    "2": sc_pb.Easy,
    "3": sc_pb.Medium,
    "4": sc_pb.MediumHard,
    "5": sc_pb.Hard,
    "6": sc_pb.Harder,
    "7": sc_pb.VeryHard,
    "8": sc_pb.CheatVision,
    "9": sc_pb.CheatMoney,
    "A": sc_pb.CheatInsane,
}

actions = {
    "move": 16,  # target: PointOrUnit
    "attack": 23,  # target: PointOrUnit
    "stop": 4,  # target: None
    "heal": 386,  # Unit
}


class Direction(enum.IntEnum):
    NORTH = 0
    SOUTH = 1
    EAST = 2
    WEST = 3


class StarCraft2Env(MultiPlayer_MultiAgentEnv):
    """The StarCraft II environment for decentralised multi-agent
    micromanagement scenarios.
    """

    def __init__(
        self,
        map_name="8m",
        step_mul=8,
        move_amount=2,
        difficulty="7",
        game_version=None,
        seed=None,
        continuing_episode=False,
        obs_all_health=True,
        obs_own_health=True,
        obs_last_action=False,
        obs_pathing_grid=False,
        obs_terrain_height=False,
        obs_instead_of_state=False,
        obs_timestep_number=False,
        state_last_action=True,
        state_timestep_number=False,
        reward_sparse=False,
        reward_only_positive=True,
        reward_death_value=10,
        reward_win=200,
        reward_defeat=0,
        reward_negative_scale=0.5,
        reward_scale=True,
        reward_scale_rate=20,
        replay_dir="",
        replay_prefix="",
        window_size_x=640,
        window_size_y=480,
        heuristic_ai=False,
        heuristic_rest=False,
        debug=False,
        mode='single',
        strategy_prob =  None,
    ):
        """
        Create a StarCraftC2Env environment.

        Parameters
        ----------
        map_name : str, optional
            The name of the SC2 map to play (default is "8m"). The full list
            can be found by running bin/map_list.
        step_mul : int, optional
            How many game steps per agent step (default is 8). None
            indicates to use the default map step_mul.
        move_amount : float, optional
            How far away units are ordered to move per step (default is 2).
        difficulty : str, optional
            The difficulty of built-in computer AI bot (default is "7").
        game_version : str, optional
            StarCraft II game version (default is None). None indicates the
            latest version.
        seed : int, optional
            Random seed used during game initialisation. This allows to
        continuing_episode : bool, optional
            Whether to consider episodes continuing or finished after time
            limit is reached (default is False).
        obs_all_health : bool, optional
            Agents receive the health of all units (in the sight range) as part
            of observations (default is True).
        obs_own_health : bool, optional
            Agents receive their own health as a part of observations (default
            is False). This flag is ignored when obs_all_health == True.
        obs_last_action : bool, optional
            Agents receive the last actions of all units (in the sight range)
            as part of observations (default is False).
        obs_pathing_grid : bool, optional
            Whether observations include pathing values surrounding the agent
            (default is False).
        obs_terrain_height : bool, optional
            Whether observations include terrain height values surrounding the
            agent (default is False).
        obs_instead_of_state : bool, optional
            Use combination of all agents' observations as the global state
            (default is False).
        obs_timestep_number : bool, optional
            Whether observations include the current timestep of the episode
            (default is False).
        state_last_action : bool, optional
            Include the last actions of all agents as part of the global state
            (default is True).
        state_timestep_number : bool, optional
            Whether the state include the current timestep of the episode
            (default is False).
        reward_sparse : bool, optional
            Receive 1/-1 reward for winning/loosing an episode (default is
            False). Whe rest of reward parameters are ignored if True.
        reward_only_positive : bool, optional
            Reward is always positive (default is True).
        reward_death_value : float, optional
            The amount of reward received for killing an enemy unit (default
            is 10). This is also the negative penalty for having an allied unit
            killed if reward_only_positive == False.
        reward_win : float, optional
            The reward for winning in an episode (default is 200).
        reward_defeat : float, optional
            The reward for loosing in an episode (default is 0). This value
            should be nonpositive.
        reward_negative_scale : float, optional
            Scaling factor for negative rewards (default is 0.5). This
            parameter is ignored when reward_only_positive == True.
        reward_scale : bool, optional
            Whether or not to scale the reward (default is True).
        reward_scale_rate : float, optional
            Reward scale rate (default is 20). When reward_scale == True, the
            reward received by the agents is divided by (max_reward /
            reward_scale_rate), where max_reward is the maximum possible
            reward per episode without considering the shield regeneration
            of Protoss units.
        replay_dir : str, optional
            The directory to save replays (default is None). If None, the
            replay will be saved in Replays directory where StarCraft II is
            installed.
        replay_prefix : str, optional
            The prefix of the replay to be saved (default is None). If None,
            the name of the map will be used.
        window_size_x : int, optional
            The length of StarCraft II window size (default is 1920).
        window_size_y: int, optional
            The height of StarCraft II window size (default is 1200).
        heuristic_ai: bool, optional
            Whether or not to use a non-learning heuristic AI (default False).
        heuristic_rest: bool, optional
            At any moment, restrict the actions of the heuristic AI to be
            chosen from actions available to RL agents (default is False).
            Ignored if heuristic_ai == False.
        debug: bool, optional
            Log messages about observations, state, actions and rewards for
            debugging purposes (default is False).
        """
        # Map arguments
        self.map_name = map_name
        map_params = get_map_params(self.map_name)
        self.n_agents_list = [map_params["n_agents"], map_params["n_enemies"]]
        self.episode_limit = map_params["limit"]
        self._move_amount = move_amount
        self._step_mul = step_mul
        self.difficulty = difficulty
        self.players = 2

        # Observations and state
        self.obs_own_health = obs_own_health
        self.obs_all_health = obs_all_health
        self.obs_instead_of_state = obs_instead_of_state
        self.obs_last_action = obs_last_action
        self.obs_pathing_grid = obs_pathing_grid
        self.obs_terrain_height = obs_terrain_height
        self.obs_timestep_number = obs_timestep_number
        self.state_last_action = state_last_action
        self.state_timestep_number = state_timestep_number
        if self.obs_all_health:
            self.obs_own_health = True
        self.n_obs_pathing = 8
        self.n_obs_height = 9

        # Rewards args
        self.reward_sparse = reward_sparse
        self.reward_only_positive = reward_only_positive
        self.reward_negative_scale = reward_negative_scale
        self.reward_death_value = reward_death_value
        self.reward_win = reward_win
        self.reward_defeat = reward_defeat
        self.reward_scale = reward_scale
        self.reward_scale_rate = reward_scale_rate

        # Other
        self.game_version = game_version
        self.continuing_episode = continuing_episode
        self._seed = seed
        self.heuristic_ai = heuristic_ai
        self.heuristic_rest = heuristic_rest
        self.debug = debug
        self.window_size = (window_size_x, window_size_y)
        self.replay_dir = replay_dir
        self.replay_prefix = replay_prefix

        # Actions
        self.n_actions_no_attack = 6
        self.n_actions_move = 4
        self.n_actions_list = [
            self.n_actions_no_attack + self.n_agents_list[1] + self.n_agents_list[0],
            self.n_actions_no_attack + self.n_agents_list[0],
        ]

        # Map info
        self._agents_race_list = [map_params["a_race"], map_params["b_race"]]
        self.shield_bits_agents_list = [
            (1 if race == "P" else 0) for race in self._agents_race_list
        ]
        self.unit_type_bits = map_params["unit_type_bits"]
        self.map_type = map_params["map_type"]
        self._unit_types = None

        self.max_reward_list = [
            (self.n_agents_list[1] * self.reward_death_value + self.reward_win),
            (self.n_agents_list[0] * self.reward_death_value + self.reward_win),
        ]

        # create lists containing the names of attributes returned in states
        self.agents_state_attr_names_list = [
            ["health", "energy/cooldown", "rel_x", "rel_y"] for _ in range(self.players)
        ]

        for i, shield_bits in enumerate(self.shield_bits_agents_list):
            if shield_bits > 0:
                self.agents_state_attr_names_list[i] += ["shield"]
        
        if self.unit_type_bits > 0:
            bit_attr_names = [
                "type_{}".format(bit) for bit in range(self.unit_type_bits)
            ]
            self.agents_state_attr_names_list[0] += bit_attr_names
            self.agents_state_attr_names_list[1] += bit_attr_names

        self._episode_count = 0
        self._episode_steps = 0
        self._total_steps = 0
        self._obs = None
        self.battles_won = [0, 0]
        self.battles_game = 0
        self.timeouts = 0
        self.force_restarts = 0
        self.last_stats = None
        self.death_tracker_agents_list = [np.zeros(n_agent) for n_agent in self.n_agents_list]
        self.previous_agents_dict_list = [None, None]
        
        self.last_action_list = [
            np.zeros((n_agents, n_actions)) 
            for n_agents, n_actions in zip(self.n_agents_list, self.n_actions_list)
        ]
        self._min_unit_type = 0
        self.marine_id = self.marauder_id = self.medivac_id = 0
        self.hydralisk_id = self.zergling_id = self.baneling_id = 0
        self.stalker_id = self.colossus_id = self.zealot_id = 0
        self.max_distance_x = 0
        self.max_distance_y = 0
        self.map_x = 0
        self.map_y = 0
        self.reward = 0
        self.renderer = None
        self.terrain_height = None
        self.pathing_grid = None
        self._run_config = None
        self._sc2_procs = None
        self._controllers = None

        self.mode = mode
        self.strategy_prob = strategy_prob
        assert self.mode in ['single', 'multi']

        # Try to avoid leaking SC2 processes on shutdown
        atexit.register(lambda: self.close())


    def _launch(self):
        """Launch the StarCraft II game."""
        
        while True:
            try:
                self.parallel = run_parallel.RunParallel()
                self._run_config = run_configs.get(version=self.game_version)
                _map = maps.get(self.map_name)

                # Setting up the interface
                interface_options = sc_pb.InterfaceOptions(raw=True, score=False)
                '''
                self._sc2_proc = self._run_config.start(
                    window_size=self.window_size, want_rgb=False
                )
                '''
                self.ports = portspicker.pick_unused_ports(self.players * 2)
                self._sc2_procs = [self._run_config.start(window_size=self.window_size, want_rgb=False) for _ in range(self.players)]
                self._controllers = [p.controller for p in self._sc2_procs]

                map_path = os.path.basename(_map.path)
                
                for c in self._controllers:  # Skip parallel due to a race condition on Windows.
                    c.save_map(map_path, _map.data(self._run_config))

                # Request to create the game
                create = sc_pb.RequestCreateGame(
                    local_map=sc_pb.LocalMap(
                        map_path=_map.path,
                        map_data=self._run_config.map_data(_map.path),
                    ),
                    realtime=True,
                    random_seed=self._seed,
                )
                create.player_setup.add(type=sc_pb.Participant)
                create.player_setup.add(type=sc_pb.Participant)
                '''
                create.player_setup.add(
                    type=sc_pb.Computer,
                    race=races[self._bot_race],
                    difficulty=difficulties[self.difficulty],
                )
                '''

                join = sc_pb.RequestJoinGame(
                    race=sc_common.Random, options=interface_options
                )

                join.shared_port = 0  # unused
                join.server_ports.game_port = self.ports[0]
                join.server_ports.base_port = self.ports[1]
                join.client_ports.add(game_port=self.ports[2], base_port=self.ports[3])

                self._controllers[0].create_game(create)


                #self._controllers[0].join_game(join)
                #self._controllers[1].join_game(join)
                self.parallel.run((c.join_game, join) for c in self._controllers)


                game_info = self._controllers[0].game_info()

                map_info = game_info.start_raw
                map_play_area_min = map_info.playable_area.p0
                map_play_area_max = map_info.playable_area.p1
                self.max_distance_x = map_play_area_max.x - map_play_area_min.x
                self.max_distance_y = map_play_area_max.y - map_play_area_min.y
                self.map_x = map_info.map_size.x
                self.map_y = map_info.map_size.y

                if map_info.pathing_grid.bits_per_pixel == 1:
                    vals = np.array(list(map_info.pathing_grid.data)).reshape(
                        self.map_x, int(self.map_y / 8)
                    )
                    self.pathing_grid = np.transpose(
                        np.array(
                            [
                                [(b >> i) & 1 for b in row for i in range(7, -1, -1)]
                                for row in vals
                            ],
                            dtype=bool,
                        )
                    )
                else:
                    self.pathing_grid = np.invert(
                        np.flip(
                            np.transpose(
                                np.array(
                                    list(map_info.pathing_grid.data), dtype=bool
                                ).reshape(self.map_x, self.map_y)
                            ),
                            axis=1,
                        )
                    )

                self.terrain_height = (
                    np.flip(
                        np.transpose(
                            np.array(list(map_info.terrain_height.data)).reshape(
                                self.map_x, self.map_y
                            )
                        ),
                        1,
                    )
                    / 255
                )
                break
            except:
                if self._controllers != None:
                    for c in self._controllers:
                        c.quit()
                if self._sc2_procs != None:
                    for p in self._sc2_procs:
                        p.close()
                portspicker.return_ports(self.ports)
                self.parallel.shutdown()
         

    def reset(self):
        """Reset the environment. Required after each full episode.
        Returns initial observations and states.
        """
        self._episode_steps = 0
        if self._episode_count == 0:
            # Launch StarCraft II
            self._launch()
        else:
            self._restart()
        # Information kept for counting the reward
        self.death_tracker_agents_list = [np.zeros(self.n_agents_list[i]) for i in range(self.players)]
        self.previous_agents_dict_list = [None, None]
        self.win_counted = [False, False]
        self.defeat_counted = [False, False]

        self.last_action_list = [np.zeros((self.n_agents_list[i], self.n_actions_list[i])) for i in range(self.players)]

        if self.heuristic_ai:
            self.heuristic_targets_list = [[None] * self.n_agents_list[i] for i in range(self.players)]

        try:
            #self._obs_list = [self._controllers[i].observe() for i in range(self.players)]
            obs = self.parallel.run(c.observe for c in self._controllers)
            self._obs_list = [obs[0], obs[1]]
            self.init_units()
        except (protocol.ProtocolError, protocol.ConnectionError):
            self.full_restart()

        if self.debug:
            logging.debug(
                "Started Episode {}".format(self._episode_count).center(
                    60, "*"
                )
            )
        if self.mode == 'single':
            
            if type(self.strategy_prob) == list:
                self.dts_script = random.choices(SCRIPT_DICT[self.map_name], weights=self.strategy_prob)(self.map_name)
            
            else:
                self.dts_script = random.choice(SCRIPT_DICT[self.map_name])(self.map_name)
            

            return self.get_obs(), self.get_state()
        
        
        return [self.get_obs(i) for i in range(self.players)], [self.get_state(i) for i in range(self.players)]

    def _restart(self):
        """Restart the environment by killing all units on the map.
        There is a trigger in the SC2Map file, which restarts the
        episode when there are no units left.
        """
        try:
            self._kill_all_units()
            # self._controllers[0].step(2)
            # self._controllers[1].step(2)
            self.parallel.run((c.step, self._step_mul) for c in self._controllers)

        except (protocol.ProtocolError, protocol.ConnectionError):
            self.full_restart()

    def full_restart(self):
        """Full restart. Closes the SC2 process and launches a new one."""
        for c in self._controllers:
            c.quit()
        for p in self._sc2_procs:
            p.close()
        portspicker.return_ports(self.ports)
        self.parallel.shutdown()
        #self._sc2_proc.close()
        self._launch()
        self.force_restarts += 1

    def get_random_blue_actions(self):
        blue_actions = []
        for agent_id in range(self.n_agents_list[1]):
            avail_actions = self.get_avail_agent_actions(agent_id, 1)
            avail_actions_ind = np.nonzero(avail_actions)[0]
            action = np.random.choice(avail_actions_ind)
            blue_actions.append(action)
        blue_actions_int = [int(a) for a in blue_actions]
        blue_sc_actions = self.actions2sc_action(blue_actions_int, 1)
        return blue_sc_actions
        
    def actions2sc_action(self, actions_int, player_id):
        sc_actions  = []
        for a_id, action in enumerate(actions_int):
            if not self.heuristic_ai:
                sc_action = self.get_agent_action(a_id, action, player_id)

            else:
                sc_action, action_num = self.get_agent_action_heuristic(
                    a_id, action, 0
                )
                actions[a_id] = action_num
            if sc_action:
                sc_actions.append(sc_action)
        return sc_actions

    def step(self, actions):
        """A single environment step. Returns reward, terminated, info."""
        if self.mode == 'single':
            red_actions_int = [int(a) for a in actions]
            red_sc_actions = self.actions2sc_action(red_actions_int, 0)

            
            if self.dts_script is not None:
                #blue_actions = i_script(self._blue_obs, self._episode_steps)
                blue_sc_actions = self.dts_script.script(self._obs_list[1], self._episode_steps)
            else:
                blue_sc_actions = self.get_random_blue_actions()
                
            self.last_action_list = [np.eye(self.n_actions_list[0] + 1)[np.array(red_actions_int)], None]
        elif self.mode == 'multi':
            actions_int_list = [[int(a) for a in action] for action in actions]
            self.last_action_list = [np.eye(self.n_actions_list[i])[np.array(actions_int_list[i])] for i in range(self.players)]
            red_sc_actions, blue_sc_actions = [self.actions2sc_action(actions_int_list[i], i) for i in range(self.players)]
        else:
            raise

        # Collect individual actions
        if self.debug:
            logging.debug("Actions".center(60, "-"))
        
        red_req_actions = sc_pb.RequestAction(actions=red_sc_actions)
        blue_req_actions = sc_pb.RequestAction(actions=blue_sc_actions)
        all_actions = [red_req_actions, blue_req_actions]

        try:
            self._obs_list = []
            
            self.parallel.run((c.actions, a) for c, a in zip(self._controllers, all_actions))
            #self._controllers[0].actions(req_actions)
            #self._controllers[1].actions(req_enemy_actions)
            
            # Make step in SC2, i.e. apply actions
            self.parallel.run((c.step, self._step_mul) for c in self._controllers)
            #self._controllers[0].step(self._step_mul)
            #self._controllers[1].step(self._step_mul)
            # Observe here so that we know if the episode is over.
            obs = self.parallel.run(c.observe for c in self._controllers)
            self._obs_list = [obs[0], obs[1]]

            # for req_action, controller in zip(all_actions, self._controllers):
            #     controller.actions(req_action)
            #     controller.step(self._step_mul)
                # self._obs_list.append(controller.observe())
            
            # Make step in SC2, i.e. apply actions
            # self._controllers[0].step(self._step_mul)
            # self._controllers[1].step(self._step_mul)
            # Observe here so that we know if the episode is over.
            # for controller in self._controllers:
        except (protocol.ProtocolError, protocol.ConnectionError):
            
            self.full_restart()
            if self.mode == 'single':
                return 0, True, {}
            else:
                return [0, 0], True, {}

        self._total_steps += 1
        self._episode_steps += 1

        # Update units
        game_end_codes = self.update_units()

        terminated = False
        rewards = self.reward_battle()
        info = {"battle_won": [False, False]}

        # count units that are still alive
        dead_allies, dead_enemies = [0, 0], [0, 0]
        for p in range(self.players):
            for _al_id, al_unit in self.agents_dict_list[p]['agents'].items():
                if al_unit.health == 0:
                    dead_allies[p] += 1
            for _e_id, e_unit in self.agents_dict_list[p]['enemies'].items():
                if e_unit.health == 0:
                    dead_enemies[p] += 1

        info["dead_allies"] = dead_allies
        info["dead_enemies"] = dead_enemies

        if game_end_codes is not None:
            # Battle is over
            #print(game_end_codes)
            terminated = True
            self.battles_game += 1
            for i, game_end_code in enumerate(game_end_codes):
                if game_end_code == 1 and not self.win_counted[i]:
                    self.battles_won[i] += 1
                    self.win_counted[i] = True
                    info["battle_won"][i] = True
                    if not self.reward_sparse:
                        rewards[i] += self.reward_win
                    else:
                        rewards[i] = 1
                elif game_end_code == -1 and not self.defeat_counted[i]:
                    self.defeat_counted[i] = True
                    if not self.reward_sparse:
                        rewards[i] += self.reward_defeat
                    else:
                        rewards[i] = -1
        elif self._episode_steps >= self.episode_limit:
            # Episode limit reached
            terminated = True
            if self.continuing_episode:
                info["episode_limit"] = True
            self.battles_game += 1
            self.timeouts += 1

        if self.debug:
            logging.debug("Reward = {}".format(rewards).center(60, "-"))

        if terminated:
            self._episode_count += 1

        if self.reward_scale:
            for i in range(self.players):
                rewards[i] /= self.max_reward_list[i] / self.reward_scale_rate

        self.rewards = rewards
        
        if self.mode == 'single':
            info = {key: value[0] for key, value in info.items()}
            return rewards[0], terminated, info
        else:
            return rewards, [terminated, terminated], info
            

    def get_agent_action(self, a_id, action, player_id):
        """Construct the action for agent a_id."""
        avail_actions = self.get_avail_agent_actions(a_id, player_id)
        assert (
            avail_actions[action] == 1
        ), "Agent {} cannot perform action {}".format(a_id, action)

        unit = self.get_unit_by_id(a_id, player_id)
        tag = unit.tag
        x = unit.pos.x
        y = unit.pos.y

        if action == 0:
            # no-op (valid only when dead)
            assert unit.health == 0, "No-op only available for dead agents."
            if self.debug:
                logging.debug("Agent {}: Dead".format(a_id))
            return None
        elif action == 1:
            # stop
            cmd = r_pb.ActionRawUnitCommand(
                ability_id=actions["stop"],
                unit_tags=[tag],
                queue_command=False,
            )
            if self.debug:
                logging.debug("Agent {}: Stop".format(a_id))

        elif action == 2:
            # move north
            cmd = r_pb.ActionRawUnitCommand(
                ability_id=actions["move"],
                target_world_space_pos=sc_common.Point2D(
                    x=x, y=y + self._move_amount
                ),
                unit_tags=[tag],
                queue_command=False,
            )
            if self.debug:
                logging.debug("Agent {}: Move North".format(a_id))

        elif action == 3:
            # move south
            cmd = r_pb.ActionRawUnitCommand(
                ability_id=actions["move"],
                target_world_space_pos=sc_common.Point2D(
                    x=x, y=y - self._move_amount
                ),
                unit_tags=[tag],
                queue_command=False,
            )
            if self.debug:
                logging.debug("Agent {}: Move South".format(a_id))

        elif action == 4:
            # move east
            cmd = r_pb.ActionRawUnitCommand(
                ability_id=actions["move"],
                target_world_space_pos=sc_common.Point2D(
                    x=x + self._move_amount, y=y
                ),
                unit_tags=[tag],
                queue_command=False,
            )
            if self.debug:
                logging.debug("Agent {}: Move East".format(a_id))

        elif action == 5:
            # move west
            cmd = r_pb.ActionRawUnitCommand(
                ability_id=actions["move"],
                target_world_space_pos=sc_common.Point2D(
                    x=x - self._move_amount, y=y
                ),
                unit_tags=[tag],
                queue_command=False,
            )
            if self.debug:
                logging.debug("Agent {}: Move West".format(a_id))
        elif action == 9:
            print('STIMPACK')
            cmd = r_pb.ActionRawUnitCommand(
                ability_id=3675,
                unit_tags=[tag],  # Empty means all selected units
                target_world_space_pos=None  # Not needed for stimpack
            )
            if self.debug:
                logging.debug("Agent{}: Use StimPack".format(a_id))
        else:
            # attack/heal units that are in range
            target_id = action - self.n_actions_no_attack
            if self.map_type == "MMM" and unit.unit_type == self.medivac_id:
                target_unit = self.agents_dict_list[player_id]['agents'][target_id]
                action_name = "heal"
            else:
                target_unit = self.agents_dict_list[player_id]['enemies'][target_id]
                action_name = "attack"

            action_id = actions[action_name]
            target_tag = target_unit.tag

            cmd = r_pb.ActionRawUnitCommand(
                ability_id=action_id,
                target_unit_tag=target_tag,
                unit_tags=[tag],
                queue_command=False,
            )

            if self.debug:
                logging.debug(
                    "Agent {} {}s unit # {}".format(
                        a_id, action_name, target_id
                    )
                )

        sc_action = sc_pb.Action(action_raw=r_pb.ActionRaw(unit_command=cmd))
        return sc_action

    def get_agent_action_heuristic(self, a_id, action, player_id):
        unit = self.get_unit_by_id(a_id)
        tag = unit.tag

        target = self.heuristic_targets[a_id]
        if unit.unit_type == self.medivac_id:
            if (
                target is None
                or self.agents_dict_list[player_id]['agents'][target].health == 0
                or self.agents_dict_list[player_id]['agents'][target].health == self.agents_dict_list[player_id]['agents'][target].health_max
            ):
                min_dist = math.hypot(self.max_distance_x, self.max_distance_y)
                min_id = -1
                for al_id, al_unit in self.agents_dict_list[player_id]['agents'].items():
                    if al_unit.unit_type == self.medivac_id:
                        continue
                    if (
                        al_unit.health != 0
                        and al_unit.health != al_unit.health_max
                    ):
                        dist = self.distance(
                            unit.pos.x,
                            unit.pos.y,
                            al_unit.pos.x,
                            al_unit.pos.y,
                        )
                        if dist < min_dist:
                            min_dist = dist
                            min_id = al_id
                self.heuristic_targets_list[player_id][a_id] = min_id
                if min_id == -1:
                    self.heuristic_targets_list[player_id][a_id] = None
                    return None, 0
            action_id = actions["heal"]
            target_tag = self.agents_dict_list[player_id]['agents'][self.heuristic_targets_list[player_id][a_id]].tag
        else:
            if target is None or self.agents_dict_list[player_id]['enemies'][target].health == 0:
                min_dist = math.hypot(self.max_distance_x, self.max_distance_y)
                min_id = -1
                for e_id, e_unit in self.agents_dict_list[player_id]['enemies'].items():
                    if (
                        unit.unit_type == self.marauder_id
                        and e_unit.unit_type == self.medivac_id
                    ):
                        continue
                    if e_unit.health > 0:
                        dist = self.distance(
                            unit.pos.x, unit.pos.y, e_unit.pos.x, e_unit.pos.y
                        )
                        if dist < min_dist:
                            min_dist = dist
                            min_id = e_id
                self.heuristic_targets_list[player_id][a_id] = min_id
                if min_id == -1:
                    self.heuristic_targets_list[player_id][a_id] = None
                    return None, 0
            action_id = actions["attack"]
            target_tag = self.agents_dict_list[player_id]['enemies'][self.heuristic_targets_list[player_id][a_id]].tag

        action_num = self.heuristic_targets_list[player_id][a_id] + self.n_actions_no_attack

        # Check if the action is available
        if (
            self.heuristic_rest
            and self.get_avail_agent_actions(a_id)[action_num] == 0
        ):

            # Move towards the target rather than attacking/healing
            if unit.unit_type == self.medivac_id:
                target_unit = self.agents_dict_list[player_id]['agents'][self.heuristic_targets_list[player_id][a_id]]
            else:
                target_unit = self.agents_dict_list[player_id]['enemies'][self.heuristic_targets_list[player_id][a_id]]

            delta_x = target_unit.pos.x - unit.pos.x
            delta_y = target_unit.pos.y - unit.pos.y

            if abs(delta_x) > abs(delta_y):  # east or west
                if delta_x > 0:  # east
                    target_pos = sc_common.Point2D(
                        x=unit.pos.x + self._move_amount, y=unit.pos.y
                    )
                    action_num = 4
                else:  # west
                    target_pos = sc_common.Point2D(
                        x=unit.pos.x - self._move_amount, y=unit.pos.y
                    )
                    action_num = 5
            else:  # north or south
                if delta_y > 0:  # north
                    target_pos = sc_common.Point2D(
                        x=unit.pos.x, y=unit.pos.y + self._move_amount
                    )
                    action_num = 2
                else:  # south
                    target_pos = sc_common.Point2D(
                        x=unit.pos.x, y=unit.pos.y - self._move_amount
                    )
                    action_num = 3

            cmd = r_pb.ActionRawUnitCommand(
                ability_id=actions["move"],
                target_world_space_pos=target_pos,
                unit_tags=[tag],
                queue_command=False,
            )
        else:
            # Attack/heal the target
            cmd = r_pb.ActionRawUnitCommand(
                ability_id=action_id,
                target_unit_tag=target_tag,
                unit_tags=[tag],
                queue_command=False,
            )

        sc_action = sc_pb.Action(action_raw=r_pb.ActionRaw(unit_command=cmd))
        return sc_action, action_num

    def reward_battle(self):
        """Reward function when self.reward_spare==False.
        Returns accumulative hit/shield point damage dealt to the enemy
        + reward_death_value per enemy unit killed, and, in case
        self.reward_only_positive == False, - (damage dealt to ally units
        + reward_death_value per ally unit killed) * self.reward_negative_scale
        """
        if self.reward_sparse:
            return 0

        neg_scale = self.reward_negative_scale
        
        rewards = [0, 0]
        delta_deaths = [0, 0]
        delta_ally = [0, 0]
        delta_enemy = [0, 0]

        # update deaths
        for p in range(self.players):
            for al_id, al_unit in self.agents_dict_list[p]['agents'].items():
                if not self.death_tracker_agents_list[p][al_id]:
                    # did not die so far
                    prev_health = (
                        self.previous_agents_dict_list[p]['agents'][al_id].health
                        + self.previous_agents_dict_list[p]['agents'][al_id].shield
                    )
                    if al_unit.health == 0:
                        # just died
                        self.death_tracker_agents_list[p][al_id] = 1
                        if not self.reward_only_positive:
                            delta_deaths[p] -= self.reward_death_value * neg_scale
                        delta_ally[p] += prev_health * neg_scale
                    else:
                        # still alive
                        delta_ally[p] += neg_scale * (
                            prev_health - al_unit.health - al_unit.shield
                        )

            for e_id, e_unit in self.agents_dict_list[p]['enemies'].items():
                if not self.death_tracker_agents_list[1-p][e_id]:
                    prev_health = (
                        self.previous_agents_dict_list[p]['enemies'][e_id].health
                        + self.previous_agents_dict_list[p]['enemies'][e_id].shield
                    )
                    if e_unit.health == 0:
                        self.death_tracker_agents_list[1-p][e_id] = 1
                        delta_deaths[p] += self.reward_death_value
                        delta_enemy[p] += prev_health
                    else:
                        delta_enemy[p] += prev_health - e_unit.health - e_unit.shield

            if self.reward_only_positive:
                rewards[p] = max(0, delta_enemy[p] + delta_deaths[p])  # shield regeneration
            else:
                rewards[p] = delta_enemy[p] + delta_deaths[p] - delta_ally[p]

        
        return rewards

    def get_total_actions(self, player_id):
        """Returns the total number of actions an agent could ever take."""
        return self.n_actions_list[player_id]

    @staticmethod
    def distance(x1, y1, x2, y2):
        """Distance between two points."""
        return math.hypot(x2 - x1, y2 - y1)

    def unit_shoot_range(self, agent_id):
        """Returns the shooting range for an agent."""
        return 6

    def unit_sight_range(self, agent_id):
        """Returns the sight range for an agent."""
        return 9

    def unit_max_cooldown(self, unit):
        """Returns the maximal cooldown for a unit."""
        switcher = {
            self.marine_id: 15,
            self.marauder_id: 25,
            self.medivac_id: 200,  # max energy
            self.stalker_id: 35,
            self.zealot_id: 22,
            self.colossus_id: 24,
            self.hydralisk_id: 10,
            self.zergling_id: 11,
            self.baneling_id: 1,
        }
        return switcher.get(unit.unit_type, 15)

    def save_replay(self):
        """Save a replay."""
        prefix = self.replay_prefix or self.map_name
        replay_dir = self.replay_dir or ""
        replay_path = self._run_config.save_replay(
            self._controllers[0].save_replay(),
            replay_dir=replay_dir,
            prefix=prefix,
        )
        print(replay_path)
        logging.info("Replay saved at: %s" % replay_path)

    def unit_max_shield(self, unit):
        """Returns maximal shield for a given unit."""
        if unit.unit_type == 74 or unit.unit_type == self.stalker_id:
            return 80  # Protoss's Stalker
        if unit.unit_type == 73 or unit.unit_type == self.zealot_id:
            return 50  # Protoss's Zaelot
        if unit.unit_type == 4 or unit.unit_type == self.colossus_id:
            return 150  # Protoss's Colossus
        if unit.unit_type == 80 or unit.unit_type == self.void_ray_id:
            return 100  # Protoss's Void Ray

    def can_move(self, unit, direction):
        """Whether a unit can move in a given direction."""
        m = self._move_amount / 2

        if direction == Direction.NORTH:
            x, y = int(unit.pos.x), int(unit.pos.y + m)
        elif direction == Direction.SOUTH:
            x, y = int(unit.pos.x), int(unit.pos.y - m)
        elif direction == Direction.EAST:
            x, y = int(unit.pos.x + m), int(unit.pos.y)
        else:
            x, y = int(unit.pos.x - m), int(unit.pos.y)

        if self.check_bounds(x, y) and self.pathing_grid[x, y]:
            return True

        return False

    def get_surrounding_points(self, unit, include_self=False):
        """Returns the surrounding points of the unit in 8 directions."""
        x = int(unit.pos.x)
        y = int(unit.pos.y)

        ma = self._move_amount

        points = [
            (x, y + 2 * ma),
            (x, y - 2 * ma),
            (x + 2 * ma, y),
            (x - 2 * ma, y),
            (x + ma, y + ma),
            (x - ma, y - ma),
            (x + ma, y - ma),
            (x - ma, y + ma),
        ]

        if include_self:
            points.append((x, y))

        return points

    def check_bounds(self, x, y):
        """Whether a point is within the map bounds."""
        return 0 <= x < self.map_x and 0 <= y < self.map_y

    def get_surrounding_pathing(self, unit):
        """Returns pathing values of the grid surrounding the given unit."""
        points = self.get_surrounding_points(unit, include_self=False)
        vals = [
            self.pathing_grid[x, y] if self.check_bounds(x, y) else 1
            for x, y in points
        ]
        return vals

    def get_surrounding_height(self, unit):
        """Returns height values of the grid surrounding the given unit."""
        points = self.get_surrounding_points(unit, include_self=True)
        vals = [
            self.terrain_height[x, y] if self.check_bounds(x, y) else 1
            for x, y in points
        ]
        return vals

    def get_obs_agent(self, agent_id, player_id):
        """Returns observation for agent_id. The observation is composed of:

        - agent movement features (where it can move to, height information
            and pathing grid)
        - enemy features (available_to_attack, health, relative_x, relative_y,
            shield, unit_type)
        - ally features (visible, distance, relative_x, relative_y, shield,
            unit_type)
        - agent unit features (health, shield, unit_type)

        All of this information is flattened and concatenated into a list,
        in the aforementioned order. To know the sizes of each of the
        features inside the final list of features, take a look at the
        functions ``get_obs_move_feats_size()``,
        ``get_obs_enemy_feats_size()``, ``get_obs_ally_feats_size()`` and
        ``get_obs_own_feats_size()``.

        The size of the observation vector may vary, depending on the
        environment configuration and type of units present in the map.
        For instance, non-Protoss units will not have shields, movement
        features may or may not include terrain height and pathing grid,
        unit_type is not included if there is only one type of unit in the
        map etc.).

        NOTE: Agents should have access only to their local observations
        during decentralised execution.
        """
        unit = self.get_unit_by_id(agent_id, player_id)

        move_feats_dim = self.get_obs_move_feats_size()
        enemy_feats_dim = self.get_obs_feats_size(1-player_id, ally=False)
        ally_feats_dim = self.get_obs_feats_size(player_id, ally=True)
        own_feats_dim = self.get_obs_own_feats_size(player_id)

        move_feats = np.zeros(move_feats_dim, dtype=np.float32)
        enemy_feats = np.zeros(enemy_feats_dim, dtype=np.float32)
        ally_feats = np.zeros(ally_feats_dim, dtype=np.float32)
        own_feats = np.zeros(own_feats_dim, dtype=np.float32)

        if unit.health > 0:  # otherwise dead, return all zeros
            x = unit.pos.x
            y = unit.pos.y
            sight_range = self.unit_sight_range(agent_id)

            # Movement features
            avail_actions = self.get_avail_agent_actions(agent_id, player_id)
            for m in range(self.n_actions_move):
                move_feats[m] = avail_actions[m + 2]

            ind = self.n_actions_move

            if self.obs_pathing_grid:
                move_feats[
                    ind : ind + self.n_obs_pathing  # noqa
                ] = self.get_surrounding_pathing(unit)
                ind += self.n_obs_pathing

            if self.obs_terrain_height:
                move_feats[ind:] = self.get_surrounding_height(unit)

            # Enemy features
            for e_id, e_unit in self.agents_dict_list[player_id]['enemies'].items():
                e_x = e_unit.pos.x
                e_y = e_unit.pos.y
                dist = self.distance(x, y, e_x, e_y)

                if (
                    dist < sight_range and e_unit.health > 0
                ):  # visible and alive
                    # Sight range > shoot range
                    enemy_feats[e_id, 0] = avail_actions[
                        self.n_actions_no_attack + e_id
                    ]  # available
                    enemy_feats[e_id, 1] = dist / sight_range  # distance
                    enemy_feats[e_id, 2] = (
                        e_x - x
                    ) / sight_range  # relative X
                    enemy_feats[e_id, 3] = (
                        e_y - y
                    ) / sight_range  # relative Y

                    ind = 4
                    if self.obs_all_health:
                        enemy_feats[e_id, ind] = (
                            e_unit.health / e_unit.health_max
                        )  # health
                        ind += 1
                        if self.shield_bits_agents_list[1-player_id] > 0:
                        # if self.shield_bits_enemy > 0:
                            max_shield = self.unit_max_shield(e_unit)
                            enemy_feats[e_id, ind] = (
                                e_unit.shield / max_shield
                            )  # shield
                            ind += 1

                    if self.unit_type_bits > 0:
                        type_id = self.get_unit_type_id(e_unit, player_id==1)
                        enemy_feats[e_id, ind + type_id] = 1  # unit type

            # Ally features
            al_ids = [
                al_id for al_id in range(self.n_agents_list[player_id]) if al_id != agent_id
            ]
            for i, al_id in enumerate(al_ids):

                al_unit = self.get_unit_by_id(al_id, player_id)
                al_x = al_unit.pos.x
                al_y = al_unit.pos.y
                dist = self.distance(x, y, al_x, al_y)

                if (
                    dist < sight_range and al_unit.health > 0
                ):  # visible and alive
                    ally_feats[i, 0] = 1  # visible
                    ally_feats[i, 1] = dist / sight_range  # distance
                    ally_feats[i, 2] = (al_x - x) / sight_range  # relative X
                    ally_feats[i, 3] = (al_y - y) / sight_range  # relative Y

                    ind = 4
                    if self.obs_all_health:
                        ally_feats[i, ind] = (
                            al_unit.health / al_unit.health_max
                        )  # health
                        ind += 1
                        if self.shield_bits_agents_list[player_id] > 0:
                            max_shield = self.unit_max_shield(al_unit)
                            ally_feats[i, ind] = (
                                al_unit.shield / max_shield
                            )  # shield
                            ind += 1

                    if self.unit_type_bits > 0:
                        type_id = self.get_unit_type_id(al_unit, player_id==0)
                        ally_feats[i, ind + type_id] = 1
                        ind += self.unit_type_bits

                    if self.obs_last_action:
                        ally_feats[i, ind:] = self.last_action_list[player_id][al_id]

            # Own features
            ind = 0
            if self.obs_own_health:
                own_feats[ind] = unit.health / unit.health_max
                ind += 1
                if self.shield_bits_agents_list[player_id] > 0:
                    max_shield = self.unit_max_shield(unit)
                    own_feats[ind] = unit.shield / max_shield
                    ind += 1

            if self.unit_type_bits > 0:
                type_id = self.get_unit_type_id(unit, player_id==0)
                own_feats[ind + type_id] = 1

        agent_obs = np.concatenate(
            (
                move_feats.flatten(),
                enemy_feats.flatten(),
                ally_feats.flatten(),
                own_feats.flatten(),
            )
        )

        if self.obs_timestep_number:
            agent_obs = np.append(
                agent_obs, self._episode_steps / self.episode_limit
            )

        if self.debug:
            logging.debug("Obs Agent: {}".format(agent_id).center(60, "-"))
            logging.debug(
                "Avail. actions {}".format(
                    self.get_avail_agent_actions(agent_id)
                )
            )
            logging.debug("Move feats {}".format(move_feats))
            logging.debug("Enemy feats {}".format(enemy_feats))
            logging.debug("Ally feats {}".format(ally_feats))
            logging.debug("Own feats {}".format(own_feats))

        return agent_obs

    def get_obs(self, player_id = 0):
        """Returns all agent observations in a list.
        NOTE: Agents should have access only to their local observations
        during decentralised execution.
        """
        agents_obs = [self.get_obs_agent(i, player_id) for i in range(self.n_agents_list[player_id])]
        return agents_obs

    def get_state(self, player_id=0):
        """Returns the global state.
        NOTE: This functon should not be used during decentralised execution.
        """
        if self.obs_instead_of_state:
            obs_concat = np.concatenate(self.get_obs(player_id), axis=0).astype(
                np.float32
            )
            return obs_concat

        state_dict = self.get_state_dict(player_id)

        state = np.append(
            state_dict["allies"].flatten(), state_dict["enemies"].flatten()
        )
        if "last_action" in state_dict:
            state = np.append(state, state_dict["last_action"].flatten())
        if "timestep" in state_dict:
            state = np.append(state, state_dict["timestep"])

        state = state.astype(dtype=np.float32)

        if self.debug:
            logging.debug("STATE".center(60, "-"))
            logging.debug("Ally state {}".format(state_dict["allies"]))
            logging.debug("Enemy state {}".format(state_dict["enemies"]))
            if self.state_last_action:
                logging.debug("Last actions {}".format(self.last_action_list[player_id]))

        return state

    def get_agent_num_attributes(self, player_id):
        return len(self.agents_state_attr_names_list[player_id])

    def get_state_dict(self, player_id):
        """Returns the global state as a dictionary.

        - allies: numpy array containing agents and their attributes
        - enemies: numpy array containing enemies and their attributes
        - last_action: numpy array of previous actions for each agent
        - timestep: current no. of steps divided by total no. of steps

        NOTE: This function should not be used during decentralised execution.
        """

        # number of features equals the number of attribute names
        nf_al = self.get_agent_num_attributes(player_id)
        nf_en = self.get_agent_num_attributes(1-player_id) - 1

        ally_state = np.zeros((self.n_agents_list[player_id], nf_al))
        enemy_state = np.zeros((self.n_agents_list[1-player_id], nf_en))

        center_x = self.map_x / 2
        center_y = self.map_y / 2

        for al_id, al_unit in self.agents_dict_list[player_id]['agents'].items():
            if al_unit.health > 0:
                x = al_unit.pos.x
                y = al_unit.pos.y
                max_cd = self.unit_max_cooldown(al_unit)

                ally_state[al_id, 0] = (
                    al_unit.health / al_unit.health_max
                )  # health
                if (
                    self.map_type == "MMM"
                    and al_unit.unit_type == self.medivac_id
                ):
                    ally_state[al_id, 1] = al_unit.energy / max_cd  # energy
                else:
                    ally_state[al_id, 1] = (
                        al_unit.weapon_cooldown / max_cd
                    )  # cooldown
                ally_state[al_id, 2] = (
                    x - center_x
                ) / self.max_distance_x  # relative X
                ally_state[al_id, 3] = (
                    y - center_y
                ) / self.max_distance_y  # relative Y

                if self.shield_bits_agents_list[player_id] > 0:
                    max_shield = self.unit_max_shield(al_unit)
                    ally_state[al_id, 4] = (
                        al_unit.shield / max_shield
                    )  # shield

                if self.unit_type_bits > 0:
                    type_id = self.get_unit_type_id(al_unit, player_id==0)
                    ally_state[al_id, type_id - self.unit_type_bits] = 1

        for e_id, e_unit in self.agents_dict_list[player_id]['enemies'].items():
            if e_unit.health > 0:
                x = e_unit.pos.x
                y = e_unit.pos.y

                enemy_state[e_id, 0] = (
                    e_unit.health / e_unit.health_max
                )  # health
                enemy_state[e_id, 1] = (
                    x - center_x
                ) / self.max_distance_x  # relative X
                enemy_state[e_id, 2] = (
                    y - center_y
                ) / self.max_distance_y  # relative Y

                if self.shield_bits_agents_list[1-player_id] > 0:
                    max_shield = self.unit_max_shield(e_unit)
                    enemy_state[e_id, 3] = e_unit.shield / max_shield  # shield

                if self.unit_type_bits > 0:
                    type_id = self.get_unit_type_id(e_unit, player_id==1)
                    enemy_state[e_id, type_id - self.unit_type_bits] = 1

        state = {"allies": ally_state, "enemies": enemy_state}

        if self.state_last_action:
            state["last_action"] = self.last_action_list[player_id]
        if self.state_timestep_number:
            state["timestep"] = self._episode_steps / self.episode_limit

        return state
    
    def get_obs_feats_size(self, player_id, ally):
        """Returns the dimensions of the matrix containing ally features.
        Size is n_allies x n_features.
        """
        nf_al = 4 + self.unit_type_bits

        if self.obs_all_health:
            nf_al += 1 + self.shield_bits_agents_list[player_id]
        if not ally:
            return self.n_agents_list[player_id], nf_al
            
        if self.obs_last_action:
            nf_al += self.n_actions_list[player_id]

        return self.n_agents_list[player_id] - 1, nf_al

    def get_obs_own_feats_size(self, player_id):
        """
        Returns the size of the vector containing the agents' own features.
        """
        own_feats = self.unit_type_bits
        if self.obs_own_health:
            own_feats += 1 + self.shield_bits_agents_list[player_id]
        if self.obs_timestep_number:
            own_feats += 1

        return own_feats

    def get_obs_move_feats_size(self):
        """Returns the size of the vector containing the agents's movement-
        related features.
        """
        move_feats = self.n_actions_move
        if self.obs_pathing_grid:
            move_feats += self.n_obs_pathing
        if self.obs_terrain_height:
            move_feats += self.n_obs_height

        return move_feats

    def get_obs_size(self, player_id):
        """Returns the size of the observation."""
        own_feats = self.get_obs_own_feats_size(player_id)
        move_feats = self.get_obs_move_feats_size()

        n_enemies, n_enemy_feats = self.get_obs_feats_size(1-player_id, ally=False)
        n_allies, n_ally_feats = self.get_obs_feats_size(player_id, ally=True)

        enemy_feats = n_enemies * n_enemy_feats
        ally_feats = n_allies * n_ally_feats

        return move_feats + enemy_feats + ally_feats + own_feats

    def get_state_size(self, player_id):
        """Returns the size of the global state."""
        if self.obs_instead_of_state:
            return self.get_obs_size() * self.n_agents_list[player_id]

        nf_al = 4 + self.shield_bits_agents_list[player_id] + self.unit_type_bits
        nf_en = 3 + self.shield_bits_agents_list[1-player_id] + self.unit_type_bits

        enemy_state = self.n_agents_list[1-player_id] * nf_en
        ally_state = self.n_agents_list[player_id] * nf_al

        size = enemy_state + ally_state

        if self.state_last_action:
            size += self.n_agents_list[player_id] * self.n_actions_list[player_id]
        if self.state_timestep_number:
            size += 1

        return size

    def get_visibility_matrix(self):
        """Returns a boolean numpy array of dimensions
        (n_agents, n_agents + n_enemies) indicating which units
        are visible to each agent.
        """
        arr = np.zeros(
            (self.n_agents, self.n_agents + self.n_enemies),
            dtype=bool,
        )

        for agent_id in range(self.n_agents):
            current_agent = self.get_unit_by_id(agent_id)
            if current_agent.health > 0:  # it agent not dead
                x = current_agent.pos.x
                y = current_agent.pos.y
                sight_range = self.unit_sight_range(agent_id)

                # Enemies
                for e_id, e_unit in self.enemies.items():
                    e_x = e_unit.pos.x
                    e_y = e_unit.pos.y
                    dist = self.distance(x, y, e_x, e_y)

                    if dist < sight_range and e_unit.health > 0:
                        # visible and alive
                        arr[agent_id, self.n_agents + e_id] = 1

                # The matrix for allies is filled symmetrically
                al_ids = [
                    al_id for al_id in range(self.n_agents) if al_id > agent_id
                ]
                for _, al_id in enumerate(al_ids):
                    al_unit = self.get_unit_by_id(al_id)
                    al_x = al_unit.pos.x
                    al_y = al_unit.pos.y
                    dist = self.distance(x, y, al_x, al_y)

                    if dist < sight_range and al_unit.health > 0:
                        # visible and alive
                        arr[agent_id, al_id] = arr[al_id, agent_id] = 1

        return arr

    def get_unit_type_id(self, unit, new):
        """Returns the ID of unit type in the given scenario."""

        if new:  # use new SC2 unit types
            type_id = unit.unit_type - self._min_unit_type
        else:  # use default SC2 unit types
            if self.map_type == "stalkers_and_zealots":
                # id(Stalker) = 74, id(Zealot) = 73
                # Notice that one-hot zealot unit type in enemy_obs will be [1, 0] but [0, 1] in ally_obs
                # If you want to align the enemy unit type with the ally's, uncomment the following lines
                # if unit.unit_type == 74:
                #     type_id = 0
                # else:
                #     type_id = 1
                type_id = unit.unit_type - 73
            elif self.map_type == "colossi_stalkers_zealots":
                # id(Stalker) = 74, id(Zealot) = 73, id(Colossus) = 4
                if unit.unit_type == 4:
                    type_id = 0
                elif unit.unit_type == 74:
                    type_id = 1
                else:
                    type_id = 2
            elif self.map_type == "bane":
                # id(Baneling) = 9
                if unit.unit_type == 9:
                    type_id = 0
                else:
                    type_id = 1
            elif self.map_type == "MMM":
                # id(Marauder) = 51, id(Marine) = 48, id(Medivac) = 54
                if unit.unit_type == 51:
                    type_id = 0
                elif unit.unit_type == 48:
                    type_id = 1
                else:
                    type_id = 2
            elif self.map_type == 'void_ray':
                if unit.unit_type == 80:
                    type_id = 0
            elif self.map_type == 'hellion':
                if unit.unit_type == 53:
                    type_id = 0
        return type_id

    def get_avail_agent_actions(self, agent_id, player_id=0):
        """Returns the available actions for agent_id."""
        unit = self.get_unit_by_id(agent_id, player_id)
        if unit.health > 0:
            # cannot choose no-op when alive
            avail_actions = [0] * (self.n_actions_list[player_id]+1)

            # stop should be allowed
            avail_actions[1] = 1

            # see if we can move
            if self.can_move(unit, Direction.NORTH):
                avail_actions[2] = 1
            if self.can_move(unit, Direction.SOUTH):
                avail_actions[3] = 1
            if self.can_move(unit, Direction.EAST):
                avail_actions[4] = 1
            if self.can_move(unit, Direction.WEST):
                avail_actions[5] = 1

            # Can attack only alive units that are alive in the shooting range
            shoot_range = self.unit_shoot_range(agent_id)

            target_items = self.agents_dict_list[player_id]['enemies'].items()
            if self.map_type == "MMM" and unit.unit_type == self.medivac_id:
                # Medivacs cannot heal themselves or other flying units
                target_items = [
                    (t_id, t_unit)
                    for (t_id, t_unit) in self.agents_dict_list[player_id]['agents'].items()
                    if t_unit.unit_type != self.medivac_id
                ]

            for t_id, t_unit in target_items:
                if t_unit.health > 0:
                    dist = self.distance(
                        unit.pos.x, unit.pos.y, t_unit.pos.x, t_unit.pos.y
                    )
                    if dist <= shoot_range:
                        avail_actions[t_id + self.n_actions_no_attack] = 1
            avail_actions[-1]=1
            return avail_actions

        else:
            # only no-op allowed
            return [1] + [0] * (self.n_actions_list[player_id] - 1)

    def get_avail_actions(self, player_id=0):
        """Returns the available actions of all agents in a list."""
        avail_actions = []
        for agent_id in range(self.n_agents_list[player_id]):
            avail_agent = self.get_avail_agent_actions(agent_id, player_id)
            avail_actions.append(avail_agent)
        return avail_actions

    def close(self):
        """Close StarCraft II."""
        if self.renderer is not None:
            self.renderer.close()
            self.renderer = None
        for p in self._sc2_procs:
            if p:
                p.close()

    def seed(self):
        """Returns the random seed used by the environment."""
        return self._seed

    def render(self, mode="human"):
        if self.renderer is None:
            from .render import StarCraft2Renderer
            
            self.renderer = StarCraft2Renderer(self, mode)
        assert (
            mode == self.renderer.mode
        ), "mode must be consistent across render calls"
        return self.renderer.render(mode)

    def _kill_all_units(self):
        """Kill all units on the map."""
        units_alive = [
            unit.tag for unit in self.agents_dict_list[0]['agents'].values() if unit.health > 0
        ] + [unit.tag for unit in self.agents_dict_list[1]['agents'].values() if unit.health > 0]
        debug_command = [
            d_pb.DebugCommand(kill_unit=d_pb.DebugKillUnit(tag=units_alive))
        ]
        self._controllers[0].debug(debug_command)
        #self._controllers[1].debug(debug_command)

    def init_units(self):
        """Initialise the units."""
        while True:
            # Sometimes not all units have yet been created by SC2
            self.agents_dict_list = [{'agents':  {}, 'enemies': {}} for _ in range(self.players)]

            units_list = [[
                [unit for unit in self._obs_list[i].observation.raw_data.units
                if unit.owner == 1+i],
                [unit for unit in self._obs_list[i].observation.raw_data.units
                if unit.owner == 2-i],
            ] for i in range(self.players)]
            units_sorted_list = [[
                sorted(
                    units,
                    key=attrgetter("unit_type", "pos.x", "pos.y"),
                    reverse=False,
                ) for units in obs_units_list
            ] for obs_units_list in units_list]

            for p in range(self.players):
                for i, unit in enumerate(units_sorted_list[p][0]):
                    self.agents_dict_list[p]['agents'][i] = unit
                    if self.debug:
                        logging.debug(
                            "Unit {} is {}, x = {}, y = {}".format(
                                i+1,
                                self.agents_dict_list[p]['agents'][i].unit_type,
                                self.agents_dict_list[p]['agents'][i].pos.x,
                                self.agents_dict_list[p]['agents'][i].pos.y,
                            )
                        )
                        

            for p in range(self.players):
                for i, unit in enumerate(units_list[p][1]):
                    self.agents_dict_list[p]['enemies'][i] = unit
                    if self._episode_count == 0:
                        self.max_reward_list[p] += unit.health_max + unit.shield_max
            
            if self._episode_count == 0:
                min_unit_type = min(
                    unit.unit_type for unit in self.agents_dict_list[0]['agents'].values()
                )
                self._init_ally_unit_types(min_unit_type)
            
            all_created = (len(self.agents_dict_list[0]['agents']) == self.n_agents_list[0] and len(self.agents_dict_list[1]['agents']) == self.n_agents_list[1])

            self._unit_types = []
            for units_sorted in units_sorted_list[0]:
                self._unit_types += [unit.unit_type for unit in units_sorted]

            if all_created:  # all good
                return

            try:
                # self._controllers[0].step(1)
                # self._controllers[1].step(1)
                self.parallel.run((c.step, 1) for c in self._controllers)
                obs = self.parallel.run(c.observe for c in self._controllers)
                self._obs_list = [obs[0], obs[1]]
                
            except (protocol.ProtocolError, protocol.ConnectionError):
                self.full_restart()
                self.reset()

    def get_unit_types(self):
        if self._unit_types is None:
            warn(
                "unit types have not been initialized yet, please call"
                "env.reset() to populate this and call t1286he method again."
            )

        return self._unit_types

    def update_units(self):
        """Update units after an environment step.
        This function assumes that self._obs is up-to-date.
        """
        n_agents_alive = [0, 0]

        # Store previous state
        
        self.previous_agents_dict_list = deepcopy(self.agents_dict_list)

        for p in range(self.players):
            for al_id, al_unit in self.agents_dict_list[p]['agents'].items():
                updated = False
                for unit in self._obs_list[p].observation.raw_data.units:
                    if al_unit.tag == unit.tag:
                        self.agents_dict_list[p]['agents'][al_id] = unit
                        updated = True
                        n_agents_alive[p] += 1
                        break

                if not updated:  # dead
                    al_unit.health = 0

            for e_id, e_unit in self.agents_dict_list[p]['enemies'].items():
                updated = False
                for unit in self._obs_list[p].observation.raw_data.units:
                    if e_unit.tag == unit.tag:
                        self.agents_dict_list[p]['enemies'][e_id] = unit
                        updated = True
                        break

                if not updated:  # dead
                    e_unit.health = 0

        if (
            n_agents_alive[0] == 0
            and n_agents_alive[1] > 0
            or self.only_medivac_left(0)
            # or self.only_medivac_left(ally=True)
        ):
            return [-1, 1]  # lost
        if (
            n_agents_alive[1] == 0
            and n_agents_alive[0] > 0
            or self.only_medivac_left(1)
            # or self.only_medivac_left(ally=False)
        ):
            return [1, -1]  # won
        if n_agents_alive[0] == 0 and n_agents_alive[1] == 0:
            return [0, 0]

        return None

    def _init_ally_unit_types(self, min_unit_type):
        """Initialise ally unit types. Should be called once from the
        init_units function.
        """
        self._min_unit_type = min_unit_type
        if self.map_type == "marines":
            self.marine_id = min_unit_type
        elif self.map_type == "stalkers_and_zealots":
            self.stalker_id = min_unit_type
            self.zealot_id = min_unit_type + 1
        elif self.map_type == "colossi_stalkers_zealots":
            self.colossus_id = min_unit_type
            self.stalker_id = min_unit_type + 1
            self.zealot_id = min_unit_type + 2
        elif self.map_type == "MMM":
            self.marauder_id = min_unit_type
            self.marine_id = min_unit_type + 1
            self.medivac_id = min_unit_type + 2
            self.tank_id = min_unit_type + 3
        elif self.map_type == "zealots":
            self.zealot_id = min_unit_type
        elif self.map_type == "hydralisks":
            self.hydralisk_id = min_unit_type
        elif self.map_type == "stalkers":
            self.stalker_id = min_unit_type
        elif self.map_type == "colossus":
            self.colossus_id = min_unit_type
        elif self.map_type == "bane":
            self.baneling_id = min_unit_type
            self.zergling_id = min_unit_type + 1
        elif self.map_type == 'void_ray':
            self.void_ray_id = min_unit_type
        elif self.map_type == 'hellion':
            self.hellion_id = min_unit_type
        
    def only_medivac_left(self, player_id):
        """Check if only Medivac units are left."""
        if self.map_type != "MMM":
            return False
        
        units_alive = [
            a
            for a in self.agents_dict_list[player_id]['agents'].values()
            if (a.health > 0 and a.unit_type != self.medivac_id)
        ]
        # medivac只有蓝方有？
        if player_id == 1:
            if len(units_alive) == 1 and units_alive[0].unit_type == 54:
                return True
            return False
        if len(units_alive) == 0:
            return True
        return False

    def get_unit_by_id(self, a_id, player_id):
        """Get unit by ID."""
        return self.agents_dict_list[player_id]['agents'][a_id]

    def get_stats(self):
        if self.mode == 'single':
            stats = {
                "battles_won": self.battles_won[0],
                "battles_game": self.battles_game,
                "battles_draw": self.timeouts,
                "win_rate": self.battles_won[0] / self.battles_game,
                "timeouts": self.timeouts,
                "restarts": self.force_restarts,
            }
        else:
            stats = {
            "battles_won": self.battles_won,
            "battles_game": self.battles_game,
            "battles_draw": self.timeouts,
            "win_rate": self.battles_won / self.battles_game,
            "timeouts": self.timeouts,
            "restarts": self.force_restarts,
        }
        return stats

    def get_env_info(self):
        env_info = super().get_env_info()
        for i in range(len(env_info)):
            env_info[i]["agent_features"] = self.agents_state_attr_names_list[i]
            env_info[i]["enemy_features"] = self.agents_state_attr_names_list[1-i]
        if self.mode == 'single':
            return env_info[0]
        return env_info