#!/bin/sh
# exp param
env="Football"
scenario="academy_3_vs_1_with_keeper"
algo="rmappotrsyn" # "mappo" "ippo"
exp="check"

project="3v1_sigma_v0"
pres=( "IDV" "TEAM" "RS" "CHANGE" )
group="RMAPPO"

# football param
num_agents=3

# train param
num_env_steps=25000000
episode_length=200
clip=0.05
idv_clip_ratio=0.5
idv_end_clip_ratio=0.2
idv_clip_episodes=50000
team_clip_ratio=0.2
team_end_clip_ratio=0.2
team_clip_episodes=50000
idv_kl_coef=0.
idv_kl_end_coef=0.005
idv_kl_episodes=50000
team_kl_coef=0.005
team_kl_end_coef=0
team_kl_episodes=50000

idv_clip_use_time=0
idv_kl_use_time=0
team_kl_use_time=0
alpha=0


files=${exp}

echo "n_rollout_threads: ${n_rollout_threads} \t ppo_epoch: ${ppo_epoch} \t num_mini_batch: ${num_mini_batch}"

CUDA_VISIBLE_DEVICES=0 python ../train/train_football_trsyn.py \
--env_name ${env} --scenario_name ${scenario} --algorithm_name ${algo} --experiment_name ${exp} --seed 1 \
--num_agents ${num_agents} --num_env_steps ${num_env_steps} --episode_length ${episode_length} \
--representation "simple115v2" --rewards "scoring,checkpoints" --n_rollout_threads 50 --ppo_epoch 15 --num_mini_batch 2 \
--save_interval 200000 --log_interval 200000 --use_eval --eval_interval 400000 --n_eval_rollout_threads 100 --eval_episodes 100 \
--use_value_active_masks --wandb_project ${project} --user_name "804703098" --wandb_name "xxx" \
--wandb_group ${pres[0]}_${group} --wandb_exp_name ${env}_${scenario}_${seed} --idv_clip_ratio ${idv_clip_ratio} --idv_end_clip_ratio ${idv_end_clip_ratio} --idv_clip_episodes ${idv_clip_episodes} --team_clip_ratio ${team_clip_ratio} --team_end_clip_ratio ${team_end_clip_ratio} --team_clip_episodes ${team_clip_episodes} --idv_kl_coef ${idv_kl_coef} --idv_kl_end_coef ${idv_kl_end_coef} --idv_kl_episodes ${idv_kl_episodes} --team_kl_coef ${team_kl_coef} --team_kl_end_coef ${team_kl_end_coef} --team_kl_episodes ${team_kl_episodes} --idv_use_shared_obs --idv_use_kl_loss --team_use_kl_loss --clip_param ${clip} --use_stacked_frames --stacked_frames 4

#CUDA_VISIBLE_DEVICES=${n} python train/train_smac_trsyn.py --init_alpha ${alpha} --env_name ${env} \
#--algorithm_name ${algos[0]} --experiment_name ${exp}_${pres[0]} --map_name ${map} --seed ${seed} --n_training_threads ${n_train}\
#--n_rollout_threads ${n_rollout} --n_eval_rollout_threads ${n_eval} --num_mini_batch ${num_mini_batch}\
#--episode_length ${episode_length} --num_env_steps ${num_env_steps} --ppo_epoch ${ppo_epoch} \
#--use_value_active_masks --use_eval --wandb_project ${project} --user_name ${user}\
#--wandb_group ${pres[0]}_${group} --wandb_exp_name ${env}_${map}_${seed} --idv_clip_ratio ${idv_clip_ratio}\
#--idv_end_clip_ratio ${idv_end_clip_ratio} --idv_clip_episodes ${idv_clip_episodes} \
#--team_clip_ratio ${team_clip_ratio} --team_end_clip_ratio ${team_end_clip_ratio}\
#--team_clip_episodes ${team_clip_episodes} --idv_kl_coef ${idv_kl_coef} --idv_kl_end_coef ${idv_kl_end_coef}\
#--idv_kl_episodes ${idv_kl_episodes} --team_kl_coef ${team_kl_coef} --team_kl_end_coef ${team_kl_end_coef}\
#--team_kl_episodes ${team_kl_episodes} --idv_use_shared_obs --idv_use_kl_loss --team_use_kl_loss --clip_param ${clip}\
#--use_recurrent_policy --use_stacked_frames --stacked_frames 4 --idv_clip_flag ${idv_clip_flag} \
#--idv_clip_flag_refine ${idv_clip_flag_refine} --idv_clip_use_time ${idv_clip_use_time} --idv_kl_use_time ${idv_kl_use_time}\
#--team_kl_use_time ${team_kl_use_time} >& out_logs/${files}/${algos[0]}_${pres[0]}_${env}_${map}_${seed}.txt &
