#!/bin/bash

source launch_job/parse_arguments.sh
parse_arguments $@


for i in $(seq 1 3)
do
    train_sac --env $ENV $WANDB_ARGUMENT --seed $((3 * (SLURM_ARRAY_TASK_ID - 1) + i)) --n_samples $N_SAMPLES --learning_rates ${LEARNING_RATES//,/ } --n_initial_samples $N_INITIAL_SAMPLES --utd $UTD --optimizer_classes ${OPTIMIZER_CLASSES//,/ } --net_archs_qf ${NET_ARCHS_QF//,/ } --activation_fns ${ACTIVATION_FNS//,/ } --m_critics $M_CRITICS $RANDOM_TARGET_QF --aggregate_target_qf $AGGREGATE_TARGET_QF $ALL_POLICY_QF --end_epsilon $END_EPSILON --duration_epsilon $DURATION_EPSILON --aggregate_policy_qf $AGGREGATE_POLICY_QF &
done

wait