import os
import time
import subprocess

# Run this:
    # echo "seed is ${seed}:"
    # CUDA_VISIBLE_DEVICES=0 python ../train/train_bridge.py  --env_name ${env} --algorithm_name ${algo} --experiment_name ${exp} --map_type ${map_type} --num_agents ${num_agents} \
    #  --seed ${seed} --n_timesteps 10 --n_training_threads 1 --n_rollout_threads 50 --num_mini_batch 1 --episode_length 30 --num_env_steps 1000000 --reward_shaping_horizon 100000000 \
    #  --ppo_epoch 15 --use_latent_actions \
    #  --save_interval 25 --log_inerval 10 --use_recurrent_policy --bc_loss_coef 0.5 \
    #  --wandb_name "username" --user_name "username" 

# env = "Bridge_dif_entropy3"
algo = "diff-mappo"


# my_env = os.environ.copy()
# my_env["CUDA_VISIBLE_DEVICES"] = "0"
num_gpu = 1
num_max_process = 1
envs = [os.environ.copy() for _ in range(num_max_process)]
process = [None for _ in range(num_max_process)]
for i in range(num_max_process):
    envs[i]["CUDA_VISIBLE_DEVICES"] = str((i)%num_gpu + 1)

project = 'Football-dif_rnew24-mini'
que = []

for joint in [True]:
    for seed in [3]:
        for use_recurrent_policy in [False]:
            for entropy_coef in [2e-3]:
                for repeat_num in [1]:
                    for clone_episodes in [8]:
                        for n_timesteps in [5]:
                            for scenario_name, rnum_agents, episode_length in [('academy_3_vs_1_with_keeper', 3, 200)]:
                            # for scenario_name, rnum_agents, episode_length in [('academy_3_vs_1_with_keeper', 3, 200), ('academy_counterattack_hard', 4, 500), ('academy_corner', 10, 500)]: 
                            # for scenario_name, rnum_agents, episode_length in [('academy_3_vs_1_with_keeper', 3, 200), ('academy_counterattack_easy', 4, 200), ('academy_counterattack_hard', 4, 500), ('academy_corner', 10, 500), ('academy_run_pass_and_shoot_with_keeper', 2, 200)]:
                            # for scenario_name, rnum_agents in [('academy_3_vs_1_with_keeper', 3), ('academy_counterattack_easy', 4), ('academy_counterattack_hard', 4), ('academy_corner', 10), ('academy_run_pass_and_shoot_with_keeper', 2)]:
                            # for scenario_name, rnum_agents in [('academy_3_vs_1_with_keeper', 3)]:
                                for lr in [1e-3]:
                                    for clone_weight_decay in [0]:
                                        for unet_hidden_size in [1024]:
                                            for hidden_size in [512]:
                                                for bc_loss_coef in [1]:
                                                    for ppo_epoch in [20]:
                                                        for clip_param in [0.2]:
                                                            for negative_sample_scale in [1]:
                                                                for logit_scaling in [20]:
                                                                    for bc_epoch in [0]:
                                                                            for eta in [-1, 0.0, 0.03, 0.1, 0.3]:
                                                                            # for eta in [-1]:
                                                                                    # vault_uid = "corner_mixed_1"
                                                                                    vault_uid = f"{scenario_name}_mixed"
                                                                                    cmd = f"python ../train/train_football.py --unet_num_layer 7 --normalize_advantage --env_name {project} --algorithm_name {algo} --experiment_name map_{scenario_name} --scenario_name {scenario_name} --num_agents {1} --rnum_agents {rnum_agents} \
                                                                            --representation 'simple115v2' --beta_schedule 'linear' --seed {seed} --n_timesteps {n_timesteps} --vault_uid {vault_uid}  --logit_scaling {logit_scaling} --clone_weight_decay {clone_weight_decay} --n_training_threads 16 --n_rollout_threads {50} --num_mini_batch 4 --episode_length {episode_length} --num_env_steps {2000000} --reward_shaping_horizon {100000000} --eta {eta} \
                                                                            --ppo_epoch {ppo_epoch} --bc_epoch {bc_epoch} --lr {lr} --bc_buffer_limit {0}  --hidden_size {hidden_size} --unet_hidden_size {unet_hidden_size} --repeat_num {repeat_num} --clone_episodes {clone_episodes} --use_latent_actions --rewards 'scoring,checkpoints' \
                                                                            --save_interval 50 --log_interval {1} --bc_loss_coef {bc_loss_coef} --clip_param {clip_param} --use_attention \
                                                                            --reward_shaping_factor 1 --single_agent --initial_reward_shaping_factor 1\
                                                                            --save_interval 20 --negative_sample_scale {negative_sample_scale} --eval_interval 10  --n_eval_rollout_threads 50 --eval_episodes 100 --use_eval  --entropy_coef {entropy_coef} \
                                                                            --wandb_name 'username' --user_name 'username' " + (' --joint_train ' if joint else '') + ('--use_recurrent_policy' if not use_recurrent_policy else '') + (' --no_rand_train ' if False else '') + ' --someinfo obs_0_and_x_rand_noise_rand'
                                                                                    que.append(cmd)
for cmd in que:
    while 1:
        ok = False
        for i in range(num_max_process):
            if process[i] is None or process[i].poll() is not None:
                process[i] = subprocess.Popen(cmd + f" --id {i}", env=envs[i], shell=True)
                ok = True
                time.sleep(200)
                break
        if ok:
            break
        else:
            time.sleep(20)

# kill $(ps -ef | grep 'diff-mappo' | awk '{print $2}')
# kill $(ps -ef | grep 'python' | awk '{print $2}')