#!/bin/bash

WANDB_ARGUMENT="--wandb_project aSAC"

source launch_job/parse_arguments.sh
parse_arguments $@

for ENV in "Hopper" # "Hopper" "Ant" "Walker2d" "Humanoid" "HalfCheetah" "HumanoidStandup"
do
    for N_SAMPLES in 500_000
    do
        for LEARNING_RATES in "0.001" # "0.005" "0.001" "0.0005" "0.005,0.001,0.0005"
        do
            for N_INITIAL_SAMPLES in 5000
            do
                for UTD in 1
                do
                    for OPTIMIZER_CLASSES in "adam" # "adam" "adagrad" "rmsprop"
                    do
                        for NET_ARCHS_QF in "256,256_256,256_256_256,256_256_256_256" # "256,256_256,256_256_256,256_256_256_256" "256" "256_256" "256_256_256" "256_256_256_256"
                        do
                            for ACTIVATION_FNS in "relu" # "relu" "tanh" "sigmoid"
                            do
                                for M_CRITICS in 2
                                do
                                    for RANDOM_TARGET_QF in "--random_target_qf" # "" "--random_target_qf"
                                    do
                                        for AGGREGATE_TARGET_QF in "min"
                                        do
                                            for ALL_POLICY_QF in ""
                                            do
                                                for END_EPSILON in 1  # 0.01 1 
                                                do
                                                    for DURATION_EPSILON in $N_SAMPLES
                                                    do
                                                        for AGGREGATE_POLICY_QF in "min"
                                                        do
                                                            launch_job/local_launcher.sh --env $ENV $WANDB_ARGUMENT -fs $FIRST_SEED -ls $LAST_SEED --n_samples $N_SAMPLES --learning_rates $LEARNING_RATES --n_initial_samples $N_INITIAL_SAMPLES --utd $UTD --optimizer_classes $OPTIMIZER_CLASSES --net_archs_qf $NET_ARCHS_QF --activation_fns $ACTIVATION_FNS --m_critics $M_CRITICS $RANDOM_TARGET_QF --aggregate_target_qf $AGGREGATE_TARGET_QF $ALL_POLICY_QF --end_epsilon $END_EPSILON --duration_epsilon $DURATION_EPSILON --aggregate_policy_qf $AGGREGATE_POLICY_QF
                                                        done
                                                    done
                                                done
                                            done
                                        done
                                    done
                                done
                            done
                        done
                    done
                done
            done
        done
    done
done

# Examples:
# SAC --utd 1 --n_critics 2 --m_critics 2 --aggregate_target_qf min --end_epsilon 1 --duration_epsilon 0 --aggregate_policy_qf min
# REDQ --utd 20 --n_critics 10 --m_critics 2 --random_target_qf --aggregate_target_qf min --all_policy_qf --end_epsilon 1 --duration_epsilon 0 --aggregate_policy_qf mean

# Goal
# SAC --utd 1 --n_critics 6 --m_critics 2 --aggregate_target_qf min --end_epsilon 0.01 --duration_epsilon 0 --aggregate_policy_qf min
# REDQ -utd 20 --n_critics 10 --m_critics 2 --aggregate_target_qf min --end_epsilon 0.01 --duration_epsilon 0 --aggregate_policy_qf mean

