#!/bin/bash

WANDB_ARGUMENT="--wandb_project aSAC"

for ENV in "Ant" "Walker2d" "Humanoid" "HalfCheetah" "HumanoidStandup" # "Hopper" "Ant" "Walker2d" "Humanoid" "HalfCheetah" "HumanoidStandup"
do
    for N_SAMPLES in 500_000
    do
        for LEARNING_RATES in "0.001" # "0.005" "0.001" "0.0005" "0.005,0.001,0.0005"
        do
            for N_INITIAL_SAMPLES in 5000
            do
                for UTD in 1
                do
                    for OPTIMIZER_CLASSES in "adam,adamw,rmsprop" # "adam" "adamw" "rmsprop" "adam,adamw,rmsprop"
                    do
                        for NET_ARCHS_QF in "256_256" # "256,256_256,256_256_256,256_256_256_256" "256" "256_256" "256_256_256" "256_256_256_256"
                        do
                            for ACTIVATION_FNS in "relu" # "relu" "tanh" "sigmoid" "relu,tanh,sigmoid"
                            do
                                for M_CRITICS in 2
                                do
                                    for RANDOM_TARGET_QF in "" # "" "--random_target_qf"
                                    do
                                        for AGGREGATE_TARGET_QF in "min"
                                        do
                                            for ALL_POLICY_QF in ""
                                            do
                                                for END_EPSILON in 0.01  # 0.01 1 
                                                do
                                                    for DURATION_EPSILON in $N_SAMPLES
                                                    do
                                                        for AGGREGATE_POLICY_QF in "min"
                                                        do
                                                            launch_job/launcher.sh --env $ENV $WANDB_ARGUMENT --n_samples $N_SAMPLES --learning_rates $LEARNING_RATES --n_initial_samples $N_INITIAL_SAMPLES --utd $UTD --optimizer_classes $OPTIMIZER_CLASSES --net_archs_qf $NET_ARCHS_QF --activation_fns $ACTIVATION_FNS --m_critics $M_CRITICS $RANDOM_TARGET_QF --aggregate_target_qf $AGGREGATE_TARGET_QF $ALL_POLICY_QF --end_epsilon $END_EPSILON --duration_epsilon $DURATION_EPSILON --aggregate_policy_qf $AGGREGATE_POLICY_QF
                                                        done
                                                    done
                                                done
                                            done
                                        done
                                    done
                                done
                            done
                        done
                    done
                done
            done
        done
    done
done

# Examples:
# SAC --utd 1 --n_critics 2 --m_critics 2 --aggregate_target_qf min --end_epsilon 1 --duration_epsilon 0 --aggregate_policy_qf min
# REDQ --utd 20 --n_critics 10 --m_critics 2 --random_target_qf --aggregate_target_qf min --all_policy_qf --end_epsilon 1 --duration_epsilon 0 --aggregate_policy_qf mean

# Goal
# SAC --utd 1 --n_critics 6 --m_critics 2 --aggregate_target_qf min --end_epsilon 0.01 --duration_epsilon 0 --aggregate_policy_qf min
# REDQ -utd 20 --n_critics 10 --m_critics 2 --aggregate_target_qf min --end_epsilon 0.01 --duration_epsilon 0 --aggregate_policy_qf mean

