#!/bin/sh
# exp param
env="Football"
scenario="academy_3_vs_1_with_keeper"
algo="rmappotrsyn" # "mappo" "ippo"
exp="check"

project="3v1_sigma_v0"
pres=( "CMT_s2r2_v0" )
group="CMT"

# football param
num_agents=3

# train param
num_env_steps=25000000
episode_length=200
clip=0.05
idv_clip_ratio=0.5
idv_end_clip_ratio=0.2
idv_clip_episodes=50000
team_clip_ratio=0.2
team_end_clip_ratio=0.2
team_clip_episodes=50000
idv_kl_coef=0.
idv_kl_end_coef=0.005
idv_kl_episodes=50000
team_kl_coef=0.005
team_kl_end_coef=0
team_kl_episodes=50000

idv_clip_use_time=0
idv_kl_use_time=0
team_kl_use_time=0
alpha=1

n=2
gpunum=8
alpha=1

files=${exp}
mkdir out_logs/${files} &> /dev/null
# declare -a seeds=( "1" "2" "3" "4" "5" "6" "7" "8")
declare -a seeds=( "1" )

for seed in "${seeds[@]}"
do
    CUDA_VISIBLE_DEVICES=${n} python train/train_football_trsyn_CMT.py --CMT --init_alpha ${alpha} --seed ${seed} --env_name ${env} --scenario_name ${scenario} --algorithm_name ${algo} --experiment_name ${exp} --seed 1 --num_agents ${num_agents} --num_env_steps ${num_env_steps} --episode_length ${episode_length} --representation "simple115v2" --rewards "scoring,checkpoints" --n_rollout_threads 50 --ppo_epoch 15 --num_mini_batch 2 --save_interval 200000 --log_interval 200000 --use_eval --eval_interval 400000 --n_eval_rollout_threads 100 --eval_episodes 100 --use_value_active_masks --wandb_project ${project} --user_name "804703098" --wandb_name "xxx" --wandb_group ${group} --wandb_exp_name ${env}_${scenario}_${seed} --idv_clip_ratio ${idv_clip_ratio} --idv_end_clip_ratio ${idv_end_clip_ratio} --idv_clip_episodes ${idv_clip_episodes} --team_clip_ratio ${team_clip_ratio} --team_end_clip_ratio ${team_end_clip_ratio} --team_clip_episodes ${team_clip_episodes} --idv_kl_coef ${idv_kl_coef} --idv_kl_end_coef ${idv_kl_end_coef} --idv_kl_episodes ${idv_kl_episodes} --team_kl_coef ${team_kl_coef} --team_kl_end_coef ${team_kl_end_coef} --team_kl_episodes ${team_kl_episodes} --idv_use_shared_obs --idv_use_kl_loss --team_use_kl_loss --clip_param ${clip} --use_stacked_frames --stacked_frames 4 >& out_logs/${files}/${algos[0]}_${pres[0]}_${env}_${map}_${seed}.txt &
    if [ ${n} != -1 ]; then
    n=$[($n+1) % ${gpunum}]
    fi
    echo "${algos[0]}_${pres[0]}_${env}_${map}_${seed} start"
    sleep 5
done

#CUDA_VISIBLE_DEVICES=${n} python train/train_smac_trsyn.py --init_alpha ${alpha} --env_name ${env} \
#--algorithm_name ${algos[0]} --experiment_name ${exp}_${pres[0]} --map_name ${map} --seed ${seed} --n_training_threads ${n_train}\
#--n_rollout_threads ${n_rollout} --n_eval_rollout_threads ${n_eval} --num_mini_batch ${num_mini_batch}\
#--episode_length ${episode_length} --num_env_steps ${num_env_steps} --ppo_epoch ${ppo_epoch} \
#--use_value_active_masks --use_eval --wandb_project ${project} --user_name ${user}\
#--wandb_group ${pres[0]}_${group} --wandb_exp_name ${env}_${map}_${seed} --idv_clip_ratio ${idv_clip_ratio}\
#--idv_end_clip_ratio ${idv_end_clip_ratio} --idv_clip_episodes ${idv_clip_episodes} \
#--team_clip_ratio ${team_clip_ratio} --team_end_clip_ratio ${team_end_clip_ratio}\
#--team_clip_episodes ${team_clip_episodes} --idv_kl_coef ${idv_kl_coef} --idv_kl_end_coef ${idv_kl_end_coef}\
#--idv_kl_episodes ${idv_kl_episodes} --team_kl_coef ${team_kl_coef} --team_kl_end_coef ${team_kl_end_coef}\
#--team_kl_episodes ${team_kl_episodes} --idv_use_shared_obs --idv_use_kl_loss --team_use_kl_loss --clip_param ${clip}\
#--use_recurrent_policy --use_stacked_frames --stacked_frames 4 --idv_clip_flag ${idv_clip_flag} \
#--idv_clip_flag_refine ${idv_clip_flag_refine} --idv_clip_use_time ${idv_clip_use_time} --idv_kl_use_time ${idv_kl_use_time}\
#--team_kl_use_time ${team_kl_use_time} >& out_logs/${files}/${algos[0]}_${pres[0]}_${env}_${map}_${seed}.txt &
