#!/bin/sh
env="MPE"
scenario_name="simple_spread_ctr"
num_good_agents=4
num_agents=4
num_adv=2
num_landmarks=2
game_mode="easy"


base=( "rmappotrsynrnd" "rmappotrsynrnd" "rmappotrsynrnd" "rmappotrsynrnd" )
# base=( "rmappotrsynrnd" "rmappotrsynrnd" )
exp="exp_train_continue_spread_ctr_base_v0"
algos=( "rnd" "rnd_adv_surgery" "rnd_gdt_surgery" "rnd_adv_gdt_surgery" )
n_train=1
n_rollout=8
n_eval=8
mini_batch=1
episode_length=25
steps=1000000
ppo_epoch=10
gain=0.01
lr=7e-4
critic_lr=7e-4
entropy_coef=0.2
entropy_end_coef=0.05
entropy_change_episode=9000
change_reward_episode=5000

user="804703098"
project="Continue_Spread_Ctr_Base_v2"
pres=( "RND" "RND_ADV_SURGERY" "RND_GDT_SURGERY" "RND_ADV_GDT_SURGERY" )
group="RMAPPO"

declare -a seeds=( "1" "22" "333" "4444" "55555" "12345" "67890" )

n=0
gpunum=8

files=${exp}
mkdir out_logs/${files} &> /dev/null

echo "env is ${env}, exp is ${exp}"
for seed in "${seeds[@]}"
do
    echo "seed is ${seed}:"
    
    CUDA_VISIBLE_DEVICES=${n} nohup python train/train_mpe_trsyn_rnd.py --std_seperated --discrete_action --use_eval --use_valuenorm --use_popart --use_ReLU --env_name ${env} --algorithm_name ${base[0]} --experiment_name ${exp}_${algos[0]} --scenario_name ${scenario_name} --num_good_agents ${num_good_agents} --num_agents ${num_agents} --num_adversaries ${num_adv} --num_landmarks ${num_landmarks} --game_mode ${game_mode} --seed ${seed} --n_training_threads ${n_train} --n_rollout_threads ${n_rollout} --n_eval_rollout_threads ${n_eval} --num_mini_batch ${mini_batch} --episode_length ${episode_length} --num_env_steps ${steps} --ppo_epoch ${ppo_epoch} --gain ${gain} --lr ${lr} --critic_lr ${critic_lr} --entropy_coef ${entropy_coef} --entropy_end_coef ${entropy_end_coef} --entropy_change_episode ${entropy_change_episode} --wandb_project ${project} --user_name ${user} --wandb_group ${pres[0]}_${group} --wandb_exp_name ${env}_${seed} --collaborative --scenario_has_diff_rewards >& out_logs/${files}/${base[0]}_${algos[0]}_${scenario_name}_${seed}.txt &
    if [ ${n} != -1 ]; then
    n=$[($n+1) % ${gpunum}]
    fi
    echo "${algos[0]}_${env}_${seed} start"

    
    CUDA_VISIBLE_DEVICES=${n} nohup python train/train_mpe_trsyn_rnd.py --std_seperated --discrete_action --use_eval --use_valuenorm --use_popart --use_ReLU --env_name ${env} --algorithm_name ${base[2]} --experiment_name ${exp}_${algos[2]} --scenario_name ${scenario_name} --num_good_agents ${num_good_agents} --num_agents ${num_agents} --num_adversaries ${num_adv} --num_landmarks ${num_landmarks} --game_mode ${game_mode} --seed ${seed} --n_training_threads ${n_train} --n_rollout_threads ${n_rollout} --n_eval_rollout_threads ${n_eval} --num_mini_batch ${mini_batch} --episode_length ${episode_length} --num_env_steps ${steps} --ppo_epoch ${ppo_epoch} --gain ${gain} --lr ${lr} --critic_lr ${critic_lr} --entropy_coef ${entropy_coef} --entropy_end_coef ${entropy_end_coef} --entropy_change_episode ${entropy_change_episode} --wandb_project ${project} --user_name ${user} --wandb_group ${pres[2]}_${group} --wandb_exp_name ${env}_${seed} --gradient_use_surgery --collaborative --scenario_has_diff_rewards >& out_logs/${files}/${base[2]}_${algos[2]}_${scenario_name}_${seed}.txt &
    if [ ${n} != -1 ]; then
    n=$[($n+1) % ${gpunum}]
    fi
    echo "${algos[2]}_${env}_${seed} start"
    
    
    
    sleep 10
    
    # COUNT=$(ps -ef |grep python |grep -v "grep" |wc -l)
    # while [ ${COUNT} -gt 0 ];
    # do
    # sleep 600
    # echo ${COUNT}
    # COUNT=$(ps -ef |grep python |grep -v "grep" |wc -l)
    # done
done
