import wandb
import sys
import socket
import setproctitle
import numpy as np
import time
from pathlib import Path
import torch
from macpo.algorithms.utils.util import init, check
from matplotlib import pyplot as plt
from PIL import Image
from macpo.config import get_config
from macpo.envs.safety_ma_mujoco.safety_multiagent_mujoco import MujocoMulti
from macpo.envs.env_wrappers import ShareSubprocVecEnv, ShareDummyVecEnv
from macpo.algorithms.r_mappo.algorithm.MACPPOPolicy import MACPPOPolicy as Policy

def parse_args(args, parser):
    parser.add_argument('--scenario', type=str, default='Hopper-v2', help="Which mujoco task to run on")
    parser.add_argument('--agent_conf', type=str, default='3x1')
    parser.add_argument('--agent_obsk', type=int, default=0)
    parser.add_argument("--add_move_state", action='store_true', default=False)
    parser.add_argument("--add_local_obs", action='store_true', default=False)
    parser.add_argument("--add_distance_state", action='store_true', default=False)
    parser.add_argument("--add_enemy_action_state", action='store_true', default=False)
    parser.add_argument("--add_agent_id", action='store_true', default=False)
    parser.add_argument("--add_visible_state", action='store_true', default=False)
    parser.add_argument("--add_xy_state", action='store_true', default=False)

    # agent-specific state should be designed carefully
    parser.add_argument("--use_state_agent", action='store_true', default=False)
    parser.add_argument("--use_mustalive", action='store_false', default=True)
    parser.add_argument("--add_center_xy", action='store_true', default=False)
    parser.add_argument("--use_single_network", action='store_true', default=False)

    all_args = parser.parse_known_args(args)[0]

    return all_args


def main(args):
    parser = get_config()
    all_args = parse_args(args, parser)
    all_args.algorithm_name = 'macpo'
    all_args.agent_conf='2x4'
    all_args.experiment_name = '2-4'
    all_args.scenario = 'Ant-v2'
    all_args.agent_obsk = 1
    all_args.episode_limit = 1000

    env_args = {"scenario": all_args.scenario,
                "agent_conf": all_args.agent_conf,
                "agent_obsk": all_args.agent_obsk,
                "episode_limit": all_args.episode_limit}
    env = MujocoMulti(env_args=env_args)
    obs, share_obs, info = env.reset()
    env_info = env.get_env_info()
    print(env_info, env.observation_space)
    n_actions = env_info["n_actions"]
    n_agents = env_info["n_agents"]
    actions = []
    policy = []
    # print(all_args)
    tpdv = dict(dtype=torch.float32, device='cpu')
    for agent_id in range(n_agents):
        share_observation_space = env.share_observation_space[agent_id] 
        # policy network
        po = Policy(all_args,
                    env.observation_space[agent_id],
                    share_observation_space,
                    env.action_space[agent_id],
                    device='cpu')
        policy.append(po)
    model_dir = '' # model location
    for agent_id in range(n_agents):
            policy_actor_state_dict = torch.load(str(model_dir) + '/actor_agent' + str(agent_id) + '.pt')
            policy[agent_id].actor.load_state_dict(policy_actor_state_dict)
    cnt=0
    while True:
        actions = []
        for agent_id in range(n_agents):
            obsi = obs[agent_id]
            obsi = check(obsi).to(**tpdv)
            # print('obs', obsi)
            actor_features = policy[agent_id].actor.base(obsi)
            available_actions= None
            action, action_log_probs = policy[agent_id].actor.act(actor_features, available_actions, deterministic=True)
            action = action.detach().cpu().numpy()
            # action = np.random.uniform(-10.0, 10.0, n_actions)
            actions.append(action)
        cnt += 1
        obs, share_obs, rewards, dones, infos, avail = env.step(actions)
        print(obs, dones, rewards, cnt)
        env.render(mode='human')
        time.sleep(0.01)
        if dones[0]:
            break


if __name__ == "__main__":
    main(sys.argv[1:])

