import os
import torch
import numpy as np
from BCAgent import BCAgent
from Overcooked_Env_new import Overcooked_NEW

# Parameters
params = {}
params['env'] = 'mo1'
params['lr'] = 0.001
params['gamma'] = 0.95
params['obs_dim'] = None
params['action_dim'] = 6
params['hidden_dim'] = 64

if __name__ == '__main__': 
    # 存储路径
    path = r"./results/models/Many_orders_place_onion_in_pot1/buffer/bc__2023-09-13_10-47-54/"
    # 环境env
    env_name = r'mo1'
    env = Overcooked_NEW(env_name,seed=1,featurize_type=("ppo","ppo"))
    env.use_render = True
    env.run_dir = path
    both_agents_ob, share_obs,available_actions = env.reset()
    both_agents_ob = np.stack(both_agents_ob) 
      
    agent_0 = BCAgent(params).cuda()
    agent_1 = BCAgent(params).cuda()
    agent_0.load_state_dict(torch.load(os.path.join(path,r"agent_0_10000.th")))#, map_location=lambda storage, loc: storage))
    agent_1.load_state_dict(torch.load(os.path.join(path,r"agent_1_10000.th")))#, map_location=lambda storage, loc: storage))
    agent_0.eval()
    agent_1.eval()
    
    # 测试
    episode_rewards = 0
    step = 0
    while True:
        obs = both_agents_ob
        action0 = agent_0.act(obs[0],True)
        action1 = agent_1.act(obs[1],True)
        actions = [[action0],[action1]]
        both_agents_ob, share_obs, reward, dones, info,available_actions = env.step(actions, test_mode=True)
        both_agents_ob = np.stack(both_agents_ob) 
        episode_rewards += reward[-1]
        if reward[-1] >0:
            print('now:',step,reward[-1])
        if dones[0] == True or dones[1] == True:
            print('episode_reward:',episode_rewards)
            episode_rewards = 0
            break
        step += 1
    print("all process end!")

