import os
import torch.nn.functional as F
import os
import numpy as np
import torch
import copy
from baseline_policy.ai_policy.policy_prefrence import Policy
from envs.Overcooked_Env_new import Overcooked_NEW
from envs.Overcooked_Env import Overcooked
from replay_buffer import ReplayBuffer


def get_script_script_Distant_Tomato(env_name,save_path, test_num=1):
    from hsp.envs.overcooked_new.script_agent import SCRIPT_AGENTS
    from hsp.envs.overcooked_new.src.overcooked_ai_py.mdp.actions import Action, Direction
    # 加载环境: Overcooked_NEW
    env = Overcooked_NEW(env_name,seed=3,featurize_type=("ppo","ppo"))
    # agent skill
    # 拿取 洋葱 放到 第1个锅 中
    agent_place_onion_pot1 = SCRIPT_AGENTS['distant_tomato_place_onion_in_pot1']() 
    agent_place_onion_pot1.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 洋葱 放到 第2个锅 中
    agent_place_onion_pot2 = SCRIPT_AGENTS['distant_tomato_place_onion_in_pot2']() 
    agent_place_onion_pot2.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 洋葱 放到 任意的锅 中
    agent_place_onion_pot = SCRIPT_AGENTS['place_onion_in_pot']() 
    agent_place_onion_pot.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 西红柿 放到 第1个锅 中
    agent_place_tomato_pot1 = SCRIPT_AGENTS['distant_tomato_place_tomato_in_pot1']() 
    agent_place_tomato_pot1.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 西红柿 放到 第2个锅 中
    agent_place_tomato_pot2 = SCRIPT_AGENTS['distant_tomato_place_tomato_in_pot2']() 
    agent_place_tomato_pot2.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 西红柿 放到 任意的锅 中
    agent_place_tomato_pot = SCRIPT_AGENTS['place_tomato_in_pot']() 
    agent_place_tomato_pot.reset(env.base_mdp,env.base_env.state,0)
    # 递送 第一个锅 中的汤到送餐口
    agent_deliver_soup1 = SCRIPT_AGENTS['distant_tomato_deliver_soup_use_pot1']() 
    agent_deliver_soup1.reset(env.base_mdp,env.base_env.state,0)
    # 递送 第二个锅 中的汤到送餐口
    agent_deliver_soup2 = SCRIPT_AGENTS['distant_tomato_deliver_soup_use_pot2']() 
    agent_deliver_soup2.reset(env.base_mdp,env.base_env.state,0)
    # 递送 任意的锅 中的汤到送餐口
    agent_deliver_soup = SCRIPT_AGENTS['deliver_soup']() 
    agent_deliver_soup.reset(env.base_mdp,env.base_env.state,0)
    # 对 第一个锅 即放洋葱又递送
    agent_place_onion_deliver_pot1 = SCRIPT_AGENTS['distant_tomato_place_onion_and_deliver_soup_usePot1']() 
    agent_place_onion_deliver_pot1.reset(env.base_mdp,env.base_env.state,0)
    # 对 第二个锅 即放洋葱又递送
    agent_place_onion_deliver_pot2 = SCRIPT_AGENTS['distant_tomato_place_onion_and_deliver_soup_usePot2']() 
    agent_place_onion_deliver_pot2.reset(env.base_mdp,env.base_env.state,0)
    # 对 任意的锅 即放洋葱又递送
    agent_place_onion_deliver_pot = SCRIPT_AGENTS['place_onion_and_deliver_soup']() 
    agent_place_onion_deliver_pot.reset(env.base_mdp,env.base_env.state,0)
    # 对 第一个锅 即放西红柿又递送
    agent_place_tomato_deliver_pot1 = SCRIPT_AGENTS['distant_tomato_place_tomato_and_deliver_soup_usePot1']() 
    agent_place_tomato_deliver_pot1.reset(env.base_mdp,env.base_env.state,0)
    # 对 第二个锅 即放西红柿又递送
    agent_place_tomato_deliver_pot2 = SCRIPT_AGENTS['distant_tomato_place_tomato_and_deliver_soup_usePot2']() 
    agent_place_tomato_deliver_pot2.reset(env.base_mdp,env.base_env.state,0)
    # 对 任意的锅 即放西红柿又递送
    agent_place_tomato_deliver_pot = SCRIPT_AGENTS['place_tomato_and_deliver_soup']() 
    agent_place_tomato_deliver_pot.reset(env.base_mdp,env.base_env.state,0)

    # human skill
    # 拿取 洋葱 放到 第1个锅 中
    human_place_onion_pot1 = SCRIPT_AGENTS['distant_tomato_place_onion_in_pot1']() 
    human_place_onion_pot1.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 洋葱 放到 第2个锅 中
    human_place_onion_pot2 = SCRIPT_AGENTS['distant_tomato_place_onion_in_pot2']() 
    human_place_onion_pot2.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 洋葱 放到 任意的锅 中
    human_place_onion_pot = SCRIPT_AGENTS['place_onion_in_pot']() 
    human_place_onion_pot.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 西红柿 放到 第1个锅 中
    human_place_tomato_pot1 = SCRIPT_AGENTS['distant_tomato_place_tomato_in_pot1']() 
    human_place_tomato_pot1.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 西红柿 放到 第2个锅 中
    human_place_tomato_pot2 = SCRIPT_AGENTS['distant_tomato_place_tomato_in_pot2']() 
    human_place_tomato_pot2.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 西红柿 放到 任意的锅 中
    human_place_tomato_pot = SCRIPT_AGENTS['place_tomato_in_pot']() 
    human_place_tomato_pot.reset(env.base_mdp,env.base_env.state,1)
    # 递送 第一个锅 中的汤到送餐口
    human_deliver_soup1 = SCRIPT_AGENTS['distant_tomato_deliver_soup_use_pot1']() 
    human_deliver_soup1.reset(env.base_mdp,env.base_env.state,1)
    # 递送 第二个锅 中的汤到送餐口
    human_deliver_soup2 = SCRIPT_AGENTS['distant_tomato_deliver_soup_use_pot2']() 
    human_deliver_soup2.reset(env.base_mdp,env.base_env.state,1)
    # 递送 任意的锅 中的汤到送餐口
    human_deliver_soup = SCRIPT_AGENTS['deliver_soup']() 
    human_deliver_soup.reset(env.base_mdp,env.base_env.state,1)
    # 对 第一个锅 即放洋葱又递送
    human_place_onion_deliver_pot1 = SCRIPT_AGENTS['distant_tomato_place_onion_and_deliver_soup_usePot1']() 
    human_place_onion_deliver_pot1.reset(env.base_mdp,env.base_env.state,1)
    # 对 第二个锅 即放洋葱又递送
    human_place_onion_deliver_pot2 = SCRIPT_AGENTS['distant_tomato_place_onion_and_deliver_soup_usePot2']() 
    human_place_onion_deliver_pot2.reset(env.base_mdp,env.base_env.state,1)
    # 对 任意的锅 即放洋葱又递送
    human_place_onion_deliver_pot = SCRIPT_AGENTS['place_onion_and_deliver_soup']() 
    human_place_onion_deliver_pot.reset(env.base_mdp,env.base_env.state,1)
    # 对 第一个锅 即放西红柿又递送
    human_place_tomato_deliver_pot1 = SCRIPT_AGENTS['distant_tomato_place_tomato_and_deliver_soup_usePot1']() 
    human_place_tomato_deliver_pot1.reset(env.base_mdp,env.base_env.state,1)
    # 对 第二个锅 即放西红柿又递送
    human_place_tomato_deliver_pot2 = SCRIPT_AGENTS['distant_tomato_place_tomato_and_deliver_soup_usePot2']() 
    human_place_tomato_deliver_pot2.reset(env.base_mdp,env.base_env.state,1)
    # 对 任意的锅 即放西红柿又递送
    human_place_tomato_deliver_pot = SCRIPT_AGENTS['place_tomato_and_deliver_soup']() 
    human_place_tomato_deliver_pot.reset(env.base_mdp,env.base_env.state,1)

   # 初始化 ReplayBuffer
    obs_shape = [2, 7*5*26]
    action_shape = [2, 1]
    reward_shape = [2, 1]
    dones_shape = [2, 1]
    replay_buffer = ReplayBuffer(obs_shape=obs_shape,
                                action_shape=action_shape,
                                reward_shape=reward_shape,
                                dones_shape=dones_shape,
                                capacity=400,
                                device='cuda')

    use_pot = 1
    episode_rewards = 0
    step = 0
    obs, _, _ = env.reset() 
    obs = np.stack(obs) 
    while True:
        actions = [[0],[0]]
        # human_deliver_soup1, human_deliver_soup, human_place_onion_deliver_pot1,human_place_tomato_deliver_pot1
        # human_place_onion_deliver_pot, human_place_tomato_deliver_pot
        # human_place_onion_pot1,human_place_tomato_pot1,human_place_onion_pot, human_place_tomato_pot
        # human_place_onion_deliver_pot, human_place_tomato_deliver_pot
        human_action = human_place_tomato_deliver_pot.step(env.base_mdp,env.base_env.state,1)
        human_action = Action.ALL_ACTIONS.index(human_action)

        ai_action = agent_deliver_soup2.step(env.base_mdp,env.base_env.state,0)
        ai_action = Action.ALL_ACTIONS.index(ai_action)
        now_obs = copy.deepcopy(obs)
        actions[0] = [ai_action]
        actions[1] = [human_action]
        next_obs, share_obs, rewards, dones, infos, available_actions = env.step(actions)
        next_obs = np.stack(next_obs)
        # 数据存储
        input_save = now_obs.reshape(2,-1)
        output_save = next_obs.reshape(2,-1)
        replay_buffer.add(input_save, np.array([ai_action,human_action]).reshape(2,-1), np.array([rewards[-1],rewards[-1]]).reshape(2,-1), output_save, np.array(dones).reshape(2,-1))

        episode_rewards += rewards[-1]
        obs = copy.deepcopy(next_obs)
        if rewards[-1] >0:
            print('now:',step,rewards[-1])
        step += 1
        if dones[0] == True or dones[1] == True:
            print('episode_reward:',episode_rewards)
            save_name = str(r"distant_tomato")+'_'+str(test_num)
            replay_buffer.save(save_path, save_name) 
            episode_rewards = 0
            break
            #env.reset()

def get_script_script_ManyOrders(env_name,save_path, test_num=1):
    from hsp.envs.overcooked_new.script_agent import SCRIPT_AGENTS
    from hsp.envs.overcooked_new.src.overcooked_ai_py.mdp.actions import Action, Direction
    # 加载环境: Overcooked_NEW
    env = Overcooked_NEW(env_name,seed=3,featurize_type=("ppo","ppo"))
    
    # agent skill
    # 拿取 洋葱 放到 第1个锅 中
    agent_place_onion_pot1 = SCRIPT_AGENTS['many_orders_place_onion_in_pot1']() 
    agent_place_onion_pot1.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 洋葱 放到 第2个锅 中
    agent_place_onion_pot2 = SCRIPT_AGENTS['many_orders_place_onion_in_pot2']() 
    agent_place_onion_pot2.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 洋葱 放到 第3个锅 中
    agent_place_onion_pot3 = SCRIPT_AGENTS['many_orders_place_onion_in_pot3']() 
    agent_place_onion_pot3.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 洋葱 放到 任意的锅 中
    agent_place_onion_pot = SCRIPT_AGENTS['place_onion_in_pot']() 
    agent_place_onion_pot.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 西红柿 放到 第1个锅 中
    agent_place_tomato_pot1 = SCRIPT_AGENTS['many_orders_place_tomato_in_pot1']() 
    agent_place_tomato_pot1.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 西红柿 放到 第2个锅 中
    agent_place_tomato_pot2 = SCRIPT_AGENTS['many_orders_place_tomato_in_pot2']() 
    agent_place_tomato_pot2.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 西红柿 放到 第3个锅 中
    agent_place_tomato_pot3 = SCRIPT_AGENTS['many_orders_place_tomato_in_pot3']() 
    agent_place_tomato_pot3.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 西红柿 放到 任意的锅 中
    agent_place_tomato_pot = SCRIPT_AGENTS['place_tomato_in_pot']() 
    agent_place_tomato_pot.reset(env.base_mdp,env.base_env.state,0)
    # 递送 第一个锅 中的汤到送餐口
    agent_deliver_soup1 = SCRIPT_AGENTS['many_orders_deliver_soup_use_pot1']() 
    agent_deliver_soup1.reset(env.base_mdp,env.base_env.state,0)
    # 递送 第二个锅 中的汤到送餐口
    agent_deliver_soup2 = SCRIPT_AGENTS['many_orders_deliver_soup_use_pot2']() 
    agent_deliver_soup2.reset(env.base_mdp,env.base_env.state,0)
    # 递送 第三个锅 中的汤到送餐口
    agent_deliver_soup3 = SCRIPT_AGENTS['many_orders_deliver_soup_use_pot3']() 
    agent_deliver_soup3.reset(env.base_mdp,env.base_env.state,0)
    # 递送 任意的锅 中的汤到送餐口
    agent_deliver_soup = SCRIPT_AGENTS['deliver_soup']() 
    agent_deliver_soup.reset(env.base_mdp,env.base_env.state,0)
    # 对 第一个锅 即放洋葱又递送
    agent_place_onion_deliver_pot1 = SCRIPT_AGENTS['many_orders_place_onion_and_deliver_soup_usePot1']() 
    agent_place_onion_deliver_pot1.reset(env.base_mdp,env.base_env.state,0)
    # 对 第二个锅 即放洋葱又递送
    agent_place_onion_deliver_pot2 = SCRIPT_AGENTS['many_orders_place_onion_and_deliver_soup_usePot2']() 
    agent_place_onion_deliver_pot2.reset(env.base_mdp,env.base_env.state,0)
    # 对 第三个锅 即放洋葱又递送
    agent_place_onion_deliver_pot3 = SCRIPT_AGENTS['many_orders_place_onion_and_deliver_soup_usePot3']() 
    agent_place_onion_deliver_pot3.reset(env.base_mdp,env.base_env.state,0)
    # 对 任意的锅 即放洋葱又递送
    agent_place_onion_deliver_pot = SCRIPT_AGENTS['place_onion_and_deliver_soup']() 
    agent_place_onion_deliver_pot.reset(env.base_mdp,env.base_env.state,0)
    # 对 第一个锅 即放西红柿又递送
    agent_place_tomato_deliver_pot1 = SCRIPT_AGENTS['many_orders_place_tomato_and_deliver_soup_usePot1']() 
    agent_place_tomato_deliver_pot1.reset(env.base_mdp,env.base_env.state,0)
    # 对 第二个锅 即放西红柿又递送
    agent_place_tomato_deliver_pot2 = SCRIPT_AGENTS['many_orders_place_tomato_and_deliver_soup_usePot2']() 
    agent_place_tomato_deliver_pot2.reset(env.base_mdp,env.base_env.state,0)
    # 对 第三个锅 即放西红柿又递送
    agent_place_tomato_deliver_pot3 = SCRIPT_AGENTS['many_orders_place_tomato_and_deliver_soup_usePot3']() 
    agent_place_tomato_deliver_pot3.reset(env.base_mdp,env.base_env.state,0)
    # 对 任意的锅 即放西红柿又递送
    agent_place_tomato_deliver_pot = SCRIPT_AGENTS['place_tomato_and_deliver_soup']() 
    agent_place_tomato_deliver_pot.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 洋葱 放到 第1-2个锅 中
    agent_place_onion_pot12 = SCRIPT_AGENTS['many_orders_place_onion_in_pot12']() 
    agent_place_onion_pot12.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 洋葱 放到 第1-3个锅 中
    agent_place_onion_pot13 = SCRIPT_AGENTS['many_orders_place_onion_in_pot13']() 
    agent_place_onion_pot13.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 洋葱 放到 第2-3个锅 中
    agent_place_onion_pot23 = SCRIPT_AGENTS['many_orders_place_onion_in_pot23']() 
    agent_place_onion_pot23.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 西红柿 放到 第1-2个锅 中
    agent_place_tomato_pot12 = SCRIPT_AGENTS['many_orders_place_tomato_in_pot12']() 
    agent_place_tomato_pot12.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 西红柿 放到 第1-3个锅 中
    agent_place_tomato_pot13 = SCRIPT_AGENTS['many_orders_place_tomato_in_pot13']() 
    agent_place_tomato_pot13.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 西红柿 放到 第1-2个锅 中
    agent_place_tomato_pot23 = SCRIPT_AGENTS['many_orders_place_tomato_in_pot23']() 
    agent_place_tomato_pot23.reset(env.base_mdp,env.base_env.state,0)
    # 对 第1-2个锅 即放洋葱又递送
    agent_place_onion_deliver_pot12 = SCRIPT_AGENTS['many_orders_place_onion_and_deliver_soup_usePot12']() 
    agent_place_onion_deliver_pot12.reset(env.base_mdp,env.base_env.state,0)
    # 对 第1-3个锅 即放洋葱又递送
    agent_place_onion_deliver_pot13 = SCRIPT_AGENTS['many_orders_place_onion_and_deliver_soup_usePot13']() 
    agent_place_onion_deliver_pot13.reset(env.base_mdp,env.base_env.state,0)
    # 对 第2-3个锅 即放洋葱又递送
    agent_place_onion_deliver_pot23 = SCRIPT_AGENTS['many_orders_place_onion_and_deliver_soup_usePot23']() 
    agent_place_onion_deliver_pot23.reset(env.base_mdp,env.base_env.state,0)
    # 对 第1-2个锅 即放西红柿又递送
    agent_place_tomato_deliver_pot12 = SCRIPT_AGENTS['many_orders_place_tomato_and_deliver_soup_usePot12']() 
    agent_place_tomato_deliver_pot12.reset(env.base_mdp,env.base_env.state,0)
    # 对 第1-3个锅 即放西红柿又递送
    agent_place_tomato_deliver_pot13 = SCRIPT_AGENTS['many_orders_place_tomato_and_deliver_soup_usePot13']() 
    agent_place_tomato_deliver_pot13.reset(env.base_mdp,env.base_env.state,0)
    # 对 第2-3个锅 即放西红柿又递送
    agent_place_tomato_deliver_pot23 = SCRIPT_AGENTS['many_orders_place_tomato_and_deliver_soup_usePot23']() 
    agent_place_tomato_deliver_pot23.reset(env.base_mdp,env.base_env.state,0)
    
    # human skill
    # 拿取 洋葱 放到 第1个锅 中
    human_place_onion_pot1 = SCRIPT_AGENTS['many_orders_place_onion_in_pot1']() 
    human_place_onion_pot1.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 洋葱 放到 第2个锅 中
    human_place_onion_pot2 = SCRIPT_AGENTS['many_orders_place_onion_in_pot2']() 
    human_place_onion_pot2.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 洋葱 放到 第3个锅 中
    human_place_onion_pot3 = SCRIPT_AGENTS['many_orders_place_onion_in_pot3']() 
    human_place_onion_pot3.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 洋葱 放到 任意的锅 中
    human_place_onion_pot = SCRIPT_AGENTS['place_onion_in_pot']() 
    human_place_onion_pot.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 西红柿 放到 第1个锅 中
    human_place_tomato_pot1 = SCRIPT_AGENTS['many_orders_place_tomato_in_pot1']() 
    human_place_tomato_pot1.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 西红柿 放到 第2个锅 中
    human_place_tomato_pot2 = SCRIPT_AGENTS['many_orders_place_tomato_in_pot2']() 
    human_place_tomato_pot2.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 西红柿 放到 第3个锅 中
    human_place_tomato_pot3 = SCRIPT_AGENTS['many_orders_place_tomato_in_pot3']() 
    human_place_tomato_pot3.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 西红柿 放到 任意的锅 中
    human_place_tomato_pot = SCRIPT_AGENTS['place_tomato_in_pot']() 
    human_place_tomato_pot.reset(env.base_mdp,env.base_env.state,1)
    # 递送 第一个锅 中的汤到送餐口
    human_deliver_soup1 = SCRIPT_AGENTS['many_orders_deliver_soup_use_pot1']() 
    human_deliver_soup1.reset(env.base_mdp,env.base_env.state,1)
    # 递送 第二个锅 中的汤到送餐口
    human_deliver_soup2 = SCRIPT_AGENTS['many_orders_deliver_soup_use_pot2']() 
    human_deliver_soup2.reset(env.base_mdp,env.base_env.state,1)
    # 递送 第三个锅 中的汤到送餐口
    human_deliver_soup3 = SCRIPT_AGENTS['many_orders_deliver_soup_use_pot3']() 
    human_deliver_soup3.reset(env.base_mdp,env.base_env.state,1)
    # 递送 任意的锅 中的汤到送餐口
    human_deliver_soup = SCRIPT_AGENTS['deliver_soup']() 
    human_deliver_soup.reset(env.base_mdp,env.base_env.state,1)
    # 对 第一个锅 即放洋葱又递送
    human_place_onion_deliver_pot1 = SCRIPT_AGENTS['many_orders_place_onion_and_deliver_soup_usePot1']() 
    human_place_onion_deliver_pot1.reset(env.base_mdp,env.base_env.state,1)
    # 对 第二个锅 即放洋葱又递送
    human_place_onion_deliver_pot2 = SCRIPT_AGENTS['many_orders_place_onion_and_deliver_soup_usePot2']() 
    human_place_onion_deliver_pot2.reset(env.base_mdp,env.base_env.state,1)
    # 对 第三个锅 即放洋葱又递送
    human_place_onion_deliver_pot3 = SCRIPT_AGENTS['many_orders_place_onion_and_deliver_soup_usePot3']() 
    human_place_onion_deliver_pot3.reset(env.base_mdp,env.base_env.state,1)
    # 对 任意的锅 即放洋葱又递送
    human_place_onion_deliver_pot = SCRIPT_AGENTS['place_onion_and_deliver_soup']() 
    human_place_onion_deliver_pot.reset(env.base_mdp,env.base_env.state,1)
    # 对 第一个锅 即放西红柿又递送
    human_place_tomato_deliver_pot1 = SCRIPT_AGENTS['many_orders_place_tomato_and_deliver_soup_usePot1']() 
    human_place_tomato_deliver_pot1.reset(env.base_mdp,env.base_env.state,1)
    # 对 第二个锅 即放西红柿又递送
    human_place_tomato_deliver_pot2 = SCRIPT_AGENTS['many_orders_place_tomato_and_deliver_soup_usePot2']() 
    human_place_tomato_deliver_pot2.reset(env.base_mdp,env.base_env.state,1)
    # 对 第三个锅 即放西红柿又递送
    human_place_tomato_deliver_pot3 = SCRIPT_AGENTS['many_orders_place_tomato_and_deliver_soup_usePot3']() 
    human_place_tomato_deliver_pot3.reset(env.base_mdp,env.base_env.state,1)
    # 对 任意的锅 即放西红柿又递送
    human_place_tomato_deliver_pot = SCRIPT_AGENTS['place_tomato_and_deliver_soup']() 
    human_place_tomato_deliver_pot.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 洋葱 放到 第1-2个锅 中
    human_place_onion_pot12 = SCRIPT_AGENTS['many_orders_place_onion_in_pot12']() 
    human_place_onion_pot12.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 洋葱 放到 第1-3个锅 中
    human_place_onion_pot13 = SCRIPT_AGENTS['many_orders_place_onion_in_pot13']() 
    human_place_onion_pot13.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 洋葱 放到 第2-3个锅 中
    human_place_onion_pot23 = SCRIPT_AGENTS['many_orders_place_onion_in_pot23']() 
    human_place_onion_pot23.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 西红柿 放到 第1-2个锅 中
    human_place_tomato_pot12 = SCRIPT_AGENTS['many_orders_place_tomato_in_pot12']() 
    human_place_tomato_pot12.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 西红柿 放到 第1-3个锅 中
    human_place_tomato_pot13 = SCRIPT_AGENTS['many_orders_place_tomato_in_pot13']() 
    human_place_tomato_pot13.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 西红柿 放到 第1-2个锅 中
    human_place_tomato_pot23 = SCRIPT_AGENTS['many_orders_place_tomato_in_pot23']() 
    human_place_tomato_pot23.reset(env.base_mdp,env.base_env.state,1)
    # 对 第1-2个锅 即放洋葱又递送
    human_place_onion_deliver_pot12 = SCRIPT_AGENTS['many_orders_place_onion_and_deliver_soup_usePot12']() 
    human_place_onion_deliver_pot12.reset(env.base_mdp,env.base_env.state,1)
    # 对 第1-3个锅 即放洋葱又递送
    human_place_onion_deliver_pot13 = SCRIPT_AGENTS['many_orders_place_onion_and_deliver_soup_usePot13']() 
    human_place_onion_deliver_pot13.reset(env.base_mdp,env.base_env.state,1)
    # 对 第2-3个锅 即放洋葱又递送
    human_place_onion_deliver_pot23 = SCRIPT_AGENTS['many_orders_place_onion_and_deliver_soup_usePot23']() 
    human_place_onion_deliver_pot23.reset(env.base_mdp,env.base_env.state,1)
    # 对 第1-2个锅 即放西红柿又递送
    human_place_tomato_deliver_pot12 = SCRIPT_AGENTS['many_orders_place_tomato_and_deliver_soup_usePot12']() 
    human_place_tomato_deliver_pot12.reset(env.base_mdp,env.base_env.state,1)
    # 对 第1-3个锅 即放西红柿又递送
    human_place_tomato_deliver_pot13 = SCRIPT_AGENTS['many_orders_place_tomato_and_deliver_soup_usePot13']() 
    human_place_tomato_deliver_pot13.reset(env.base_mdp,env.base_env.state,1)
    # 对 第2-3个锅 即放西红柿又递送
    human_place_tomato_deliver_pot23 = SCRIPT_AGENTS['many_orders_place_tomato_and_deliver_soup_usePot23']() 
    human_place_tomato_deliver_pot23.reset(env.base_mdp,env.base_env.state,1)

   # 初始化 ReplayBuffer
    obs_shape = [2, 5*5*26]
    action_shape = [2, 1]
    reward_shape = [2, 1]
    dones_shape = [2, 1]
    replay_buffer = ReplayBuffer(obs_shape=obs_shape,
                                action_shape=action_shape,
                                reward_shape=reward_shape,
                                dones_shape=dones_shape,
                                capacity=400,
                                device='cuda')

    use_pot = 1
    episode_rewards = 0
    step = 0
    obs, _, _ = env.reset() 
    obs = np.stack(obs) 
    while True:
        actions = [[0],[0]]
        # human_deliver_soup1,human_deliver_soup,human_place_onion_deliver_pot12,human_place_onion_deliver_pot13
        # human_place_onion_deliver_pot,human_place_tomato_deliver_pot,human_place_tomato_deliver_pot12,human_place_tomato_deliver_pot13
        # human_place_onion_pot1,human_place_tomato_pot1,agent_place_onion_deliver_pot1,agent_place_tomato_deliver_pot1
        # human_place_onion_pot12,human_place_tomato_pot12,human_place_onion_deliver_pot12,human_place_tomato_deliver_pot12
        # human_place_onion_pot,human_place_tomato_pot,human_place_onion_deliver_pot,human_place_tomato_deliver_pot,
        human_action = human_place_tomato_deliver_pot.step(env.base_mdp,env.base_env.state,1)
        human_action = Action.ALL_ACTIONS.index(human_action)

        ai_action = agent_deliver_soup1.step(env.base_mdp,env.base_env.state,0)
        ai_action = Action.ALL_ACTIONS.index(ai_action)
        now_obs = copy.deepcopy(obs)
        actions[0] = [ai_action]
        actions[1] = [human_action]
        next_obs, share_obs, rewards, dones, infos, available_actions = env.step(actions)
        next_obs = np.stack(next_obs)
        # 数据存储
        input_save = now_obs.reshape(2,-1)
        output_save = next_obs.reshape(2,-1)
        replay_buffer.add(input_save, np.array([ai_action,human_action]).reshape(2,-1), np.array([rewards[-1],rewards[-1]]).reshape(2,-1), output_save, np.array(dones).reshape(2,-1))

        episode_rewards += rewards[-1]
        obs = copy.deepcopy(next_obs)
        if rewards[-1] >0:
            print('now:',step,rewards[-1])
        step += 1
        if dones[0] == True or dones[1] == True:
            print('episode_reward:',episode_rewards)
            save_name = str(r"manyOrders")+'_'+str(test_num)
            replay_buffer.save(save_path, save_name) 
            episode_rewards = 0
            break
            #env.reset()

def get_script_script_Random3(env_name,save_path,test_num=1):
    from hsp.envs.overcooked.script_agent import SCRIPT_AGENTS
    from hsp.envs.overcooked.overcooked_ai_py.mdp.actions import Action, Direction
    # 加载环境: Overcooked
    env = Overcooked(env_name,seed=3,featurize_type=("ppo","ppo")) 
    
    # skill库
    # 拿取 洋葱 放到 第一个锅 中
    agent_place_pot1 = SCRIPT_AGENTS['random3_place_onion_in_pot1']() 
    agent_place_pot1.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 洋葱 放到 第二个锅 中
    agent_place_pot2 = SCRIPT_AGENTS['random3_place_onion_in_pot2']() 
    agent_place_pot2.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 洋葱 放到  任意锅 中
    agent_place_pot = SCRIPT_AGENTS['place_onion_in_pot']() 
    agent_place_pot.reset(env.base_mdp,env.base_env.state,0)
    # 递送 第一个锅 中的汤到送餐口
    agent_deliver_soup1 = SCRIPT_AGENTS['random3_deliver_soup_use_pot1']() 
    agent_deliver_soup1.reset(env.base_mdp,env.base_env.state,0)
    # 递送 第二个锅 中的汤到送餐口
    agent_deliver_soup2 = SCRIPT_AGENTS['random3_deliver_soup_use_pot2']() 
    agent_deliver_soup2.reset(env.base_mdp,env.base_env.state,0)
    # 递送 任意锅 中的汤到送餐口
    agent_deliver_soup = SCRIPT_AGENTS['deliver_soup']() 
    agent_deliver_soup.reset(env.base_mdp,env.base_env.state,0)
    # 对 第一个锅 即放洋葱又递送
    agent_place_deliver_pot1 = SCRIPT_AGENTS['random3_place_onion_and_deliver_soup1']() 
    agent_place_deliver_pot1.reset(env.base_mdp,env.base_env.state,0)
    # 对 第二个锅 即放洋葱又递送
    agent_place_deliver_pot2 = SCRIPT_AGENTS['random3_place_onion_and_deliver_soup2']() 
    agent_place_deliver_pot2.reset(env.base_mdp,env.base_env.state,0)
    # 对 任意锅 即放洋葱又递送
    agent_place_deliver = SCRIPT_AGENTS['place_onion_and_deliver_soup']() 
    agent_place_deliver.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 洋葱 放到中间的柜台上
    agent_place_middle = SCRIPT_AGENTS['random3_only_onion_to_middle']() 
    agent_place_middle.reset(env.base_mdp,env.base_env.state,0)

   # 初始化 ReplayBuffer
    obs_shape = [2, 8*5*20]
    action_shape = [2, 1]
    reward_shape = [2, 1]
    dones_shape = [2, 1]
    replay_buffer = ReplayBuffer(obs_shape=obs_shape,
                                action_shape=action_shape,
                                reward_shape=reward_shape,
                                dones_shape=dones_shape,
                                capacity=400,
                                device='cuda')

    use_pot = 1
    episode_rewards = 0
    step = 0
    obs, _, _ = env.reset() 
    obs = np.stack(obs) 
    while True:
        actions = [[0],[0]]
        # agent_deliver_soup agent_place_deliver_pot2 agent_place_deliver
        # agent_place_pot2 agent_place_pot agent_place_deliver_pot2 agent_place_deliver
        human_action = agent_place_deliver.step(env.base_mdp,env.base_env.state,1)
        human_action = Action.ALL_ACTIONS.index(human_action)

        ai_action = agent_deliver_soup2.step(env.base_mdp,env.base_env.state,0)
        ai_action = Action.ALL_ACTIONS.index(ai_action)
        now_obs = copy.deepcopy(obs)
        actions[0] = [ai_action]
        actions[1] = [human_action]
        next_obs, share_obs, rewards, dones, infos, available_actions = env.step(actions)
        next_obs = np.stack(next_obs)
        # 数据存储
        input_save = now_obs.reshape(2,-1)
        output_save = next_obs.reshape(2,-1)
        replay_buffer.add(input_save, np.array([ai_action,human_action]).reshape(2,-1), np.array([rewards[-1],rewards[-1]]).reshape(2,-1), output_save, np.array(dones).reshape(2,-1))

        episode_rewards += rewards[-1]
        obs = copy.deepcopy(next_obs)
        if rewards[-1] >0:
            print('now:',step,rewards[-1])
        step += 1
        if dones[0] == True or dones[1] == True:
            print('episode_reward:',episode_rewards)
            save_name = str(r"random3")+'_'+str(test_num)
            replay_buffer.save(save_path, save_name) 
            episode_rewards = 0
            break
            #env.reset()

def get_script_script_Soup_Coordination(env_name,save_path, test_num=1):
    from hsp.envs.overcooked_new.script_agent import SCRIPT_AGENTS
    from hsp.envs.overcooked_new.src.overcooked_ai_py.mdp.actions import Action, Direction
    # 加载环境: Overcooked_NEW
    env = Overcooked_NEW(env_name,seed=3,featurize_type=("ppo","ppo"))
    
    # agent skill
    # 拿取 洋葱 放到 第一个锅 中
    agent_place_onion_pot1 = SCRIPT_AGENTS['place_onion_in_pot']() 
    agent_place_onion_pot1.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 西红柿 放到 第1个锅 中
    agent_place_tomato_pot1 = SCRIPT_AGENTS['place_tomato_in_pot']() 
    agent_place_tomato_pot1.reset(env.base_mdp,env.base_env.state,0)
    # 递送 第一个锅 中的汤到送餐口
    agent_deliver_soup1 = SCRIPT_AGENTS['deliver_soup']() 
    agent_deliver_soup1.reset(env.base_mdp,env.base_env.state,0)
    # 对 第一个锅 即放洋葱又递送
    agent_place_onion_deliver_pot1 = SCRIPT_AGENTS['place_onion_and_deliver_soup']() 
    agent_place_onion_deliver_pot1.reset(env.base_mdp,env.base_env.state,0)
    # 对 第一个锅 即放西红柿又递送
    agent_place_tomato_deliver_pot1 = SCRIPT_AGENTS['place_tomato_and_deliver_soup']() 
    agent_place_tomato_deliver_pot1.reset(env.base_mdp,env.base_env.state,0)
    
    # human skill
    # 拿取 洋葱 放到 第一个锅 中
    human_place_onion_pot1 = SCRIPT_AGENTS['place_onion_in_pot']() 
    human_place_onion_pot1.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 西红柿 放到 第1个锅 中
    human_place_tomato_pot1 = SCRIPT_AGENTS['place_tomato_in_pot']() 
    human_place_tomato_pot1.reset(env.base_mdp,env.base_env.state,1)
    # 递送 第一个锅 中的汤到送餐口
    human_deliver_soup1 = SCRIPT_AGENTS['deliver_soup']() 
    human_deliver_soup1.reset(env.base_mdp,env.base_env.state,1)
    # 对 第一个锅 即放洋葱又递送
    human_place_onion_deliver_pot1 = SCRIPT_AGENTS['place_onion_and_deliver_soup']() 
    human_place_onion_deliver_pot1.reset(env.base_mdp,env.base_env.state,1)
    # 对 第一个锅 即放西红柿又递送
    human_place_tomato_deliver_pot1 = SCRIPT_AGENTS['place_tomato_and_deliver_soup']() 
    human_place_tomato_deliver_pot1.reset(env.base_mdp,env.base_env.state,1)

   # 初始化 ReplayBuffer
    obs_shape = [2, 11*5*26]
    action_shape = [2, 1]
    reward_shape = [2, 1]
    dones_shape = [2, 1]
    replay_buffer = ReplayBuffer(obs_shape=obs_shape,
                                action_shape=action_shape,
                                reward_shape=reward_shape,
                                dones_shape=dones_shape,
                                capacity=400,
                                device='cuda')

    use_pot = 1
    episode_rewards = 0
    step = 0
    obs, _, _ = env.reset() 
    obs = np.stack(obs) 
    while True:
        actions = [[0],[0]]
        human_action = human_place_onion_deliver_pot1.step(env.base_mdp,env.base_env.state,1)
        human_action = Action.ALL_ACTIONS.index(human_action)

        ai_action = agent_deliver_soup1.step(env.base_mdp,env.base_env.state,0)
        ai_action = Action.ALL_ACTIONS.index(ai_action)
        now_obs = copy.deepcopy(obs)
        actions[0] = [ai_action]
        actions[1] = [human_action]
        next_obs, share_obs, rewards, dones, infos, available_actions = env.step(actions)
        next_obs = np.stack(next_obs)
        # 数据存储
        input_save = now_obs.reshape(2,-1)
        output_save = next_obs.reshape(2,-1)
        replay_buffer.add(input_save, np.array([ai_action,human_action]).reshape(2,-1), np.array([rewards[-1],rewards[-1]]).reshape(2,-1), output_save, np.array(dones).reshape(2,-1))

        episode_rewards += rewards[-1]
        obs = copy.deepcopy(next_obs)
        if rewards[-1] >0:
            print('now:',step,rewards[-1])
        step += 1
        if dones[0] == True or dones[1] == True:
            print('episode_reward:',episode_rewards)
            save_name = str(r"soupCoordination")+'_'+str(test_num)
            replay_buffer.save(save_path, save_name) 
            episode_rewards = 0
            break
            #env.reset()
            
def get_script_script_Unident_S(env_name,save_path,test_num=1):
    from hsp.envs.overcooked.script_agent import SCRIPT_AGENTS
    from hsp.envs.overcooked.overcooked_ai_py.mdp.actions import Action, Direction
    # 加载环境: Overcooked
    env = Overcooked(env_name,seed=3,featurize_type=("ppo","ppo")) 
    
    # agent skill库
    # 拿取 洋葱 放到 第一个锅 中
    agent_place_pot1 = SCRIPT_AGENTS['aa_place_onion_in_pot1']() 
    agent_place_pot1.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 洋葱 放到 第二个锅 中
    agent_place_pot2 = SCRIPT_AGENTS['aa_place_onion_in_pot2']() 
    agent_place_pot2.reset(env.base_mdp,env.base_env.state,0)
    # 拿取 洋葱 放到  任意锅 中
    agent_place_pot = SCRIPT_AGENTS['place_onion_in_pot']() 
    agent_place_pot.reset(env.base_mdp,env.base_env.state,0)
    # 递送 第一个锅 中的汤到送餐口
    agent_deliver_soup1 = SCRIPT_AGENTS['aa_deliver_soup_use_pot1']() 
    agent_deliver_soup1.reset(env.base_mdp,env.base_env.state,0)
    # 递送 第二个锅 中的汤到送餐口
    agent_deliver_soup2 = SCRIPT_AGENTS['aa_deliver_soup_use_pot2']() 
    agent_deliver_soup2.reset(env.base_mdp,env.base_env.state,0)
    # 递送 任意锅 中的汤到送餐口
    agent_deliver_soup = SCRIPT_AGENTS['deliver_soup']() 
    agent_deliver_soup.reset(env.base_mdp,env.base_env.state,0)
    # 对 第一个锅 即放洋葱又递送
    agent_place_deliver_pot1 = SCRIPT_AGENTS['aa_place_onion_and_deliver_soup1']() 
    agent_place_deliver_pot1.reset(env.base_mdp,env.base_env.state,0)
    # 对 第二个锅 即放洋葱又递送
    agent_place_deliver_pot2 = SCRIPT_AGENTS['aa_place_onion_and_deliver_soup2']() 
    agent_place_deliver_pot2.reset(env.base_mdp,env.base_env.state,0)
    # 对 任意锅 即放洋葱又递送
    agent_place_deliver = SCRIPT_AGENTS['place_onion_and_deliver_soup']() 
    agent_place_deliver.reset(env.base_mdp,env.base_env.state,0)

    # human skill库
    # 拿取 洋葱 放到 第一个锅 中
    human_place_pot1 = SCRIPT_AGENTS['aa_place_onion_in_pot1']() 
    human_place_pot1.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 洋葱 放到 第二个锅 中
    human_place_pot2 = SCRIPT_AGENTS['aa_place_onion_in_pot2']() 
    human_place_pot2.reset(env.base_mdp,env.base_env.state,1)
    # 拿取 洋葱 放到  任意锅 中
    human_place_pot = SCRIPT_AGENTS['place_onion_in_pot']() 
    human_place_pot.reset(env.base_mdp,env.base_env.state,1)
    # 递送 第一个锅 中的汤到送餐口
    human_deliver_soup1 = SCRIPT_AGENTS['aa_deliver_soup_use_pot1']() 
    human_deliver_soup1.reset(env.base_mdp,env.base_env.state,1)
    # 递送 第二个锅 中的汤到送餐口
    human_deliver_soup2 = SCRIPT_AGENTS['aa_deliver_soup_use_pot2']() 
    human_deliver_soup2.reset(env.base_mdp,env.base_env.state,1)
    # 递送 任意锅 中的汤到送餐口
    human_deliver_soup = SCRIPT_AGENTS['deliver_soup']() 
    human_deliver_soup.reset(env.base_mdp,env.base_env.state,1)
    # 对 第一个锅 即放洋葱又递送
    human_place_deliver_pot1 = SCRIPT_AGENTS['aa_place_onion_and_deliver_soup1']() 
    human_place_deliver_pot1.reset(env.base_mdp,env.base_env.state,1)
    # 对 第二个锅 即放洋葱又递送
    human_place_deliver_pot2 = SCRIPT_AGENTS['aa_place_onion_and_deliver_soup2']() 
    human_place_deliver_pot2.reset(env.base_mdp,env.base_env.state,1)
    # 对 任意锅 即放洋葱又递送
    human_place_deliver = SCRIPT_AGENTS['place_onion_and_deliver_soup']() 
    human_place_deliver.reset(env.base_mdp,env.base_env.state,1)

   # 初始化 ReplayBuffer
    obs_shape = [2, 9*5*20]
    action_shape = [2, 1]
    reward_shape = [2, 1]
    dones_shape = [2, 1]
    replay_buffer = ReplayBuffer(obs_shape=obs_shape,
                                action_shape=action_shape,
                                reward_shape=reward_shape,
                                dones_shape=dones_shape,
                                capacity=400,
                                device='cuda')

    use_pot = 1
    episode_rewards = 0
    step = 0
    obs, _, _ = env.reset() 
    obs = np.stack(obs) 
    while True:
        actions = [[0],[0]]
        # human_deliver_soup1 human_deliver_soup human_place_deliver_pot1 human_place_deliver
        # human_place_pot1
        human_action = human_place_deliver.step(env.base_mdp,env.base_env.state,1)
        human_action = Action.ALL_ACTIONS.index(human_action)

        ai_action = agent_deliver_soup2.step(env.base_mdp,env.base_env.state,0)
        ai_action = Action.ALL_ACTIONS.index(ai_action)
        now_obs = copy.deepcopy(obs)
        actions[0] = [ai_action]
        actions[1] = [human_action]
        next_obs, share_obs, rewards, dones, infos, available_actions = env.step(actions)
        next_obs = np.stack(next_obs)
        # 数据存储
        input_save = now_obs.reshape(2,-1)
        output_save = next_obs.reshape(2,-1)
        replay_buffer.add(input_save, np.array([ai_action,human_action]).reshape(2,-1), np.array([rewards[-1],rewards[-1]]).reshape(2,-1), output_save, np.array(dones).reshape(2,-1))

        episode_rewards += rewards[-1]
        obs = copy.deepcopy(next_obs)
        if rewards[-1] >0:
            print('now:',step,rewards[-1])
        step += 1
        if dones[0] == True or dones[1] == True:
            print('episode_reward:',episode_rewards)
            save_name = str(r"unident_s")+'_'+str(test_num)
            replay_buffer.save(save_path, save_name) 
            episode_rewards = 0
            break
            #env.reset()

if __name__ == '__main__':
    # # 实验布局
    # env_name=r'distant_tomato'
    # # 存储路径
    # save_path = r"Diatant_Tomato_deliver_soup_use_pot2"
    # if not os.path.exists(save_path):
    #     os.makedirs(save_path)
    
    import sys
    print('parameters:', sys.argv)
    env_name = sys.argv[1]
    save_path = sys.argv[2]
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    get_script_script_Distant_Tomato(env_name,save_path,test_num=1)
 
    
    
    
    
    