Environments:

CartPole-v1
Acrobot-v1
LunarLander-v2

Pendulum-v1
MountainCarContinuous-v0

MiniGrid-Empty-Random-6x6-v0
MiniGrid-DoorKey-5x5-v0
MiniGrid-LavaGapS5-v0
MiniGrid-LavaGapS7-v0
MiniGrid-DistShift1-v0

MiniGrid-DistShift4-v0
________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________

nohup python a2c_gymnax.py --env_id CartPole-v1 --track --n_estimators 1 --gpu_number 1 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_useBest --exp_name results_useBest_A2C  --use_best_config --dynamic_buffer &
nohup python a2c_gymnax.py --env_id Acrobot-v1 --track --n_estimators 1 --gpu_number 1 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_useBest --exp_name results_useBest_A2C  --use_best_config --dynamic_buffer &
nohup python a2c.py --env_id LunarLander-v2 --track --n_estimators 1 --gpu_number 0 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_useBest --exp_name results_useBest_A2C --use_best_config --dynamic_buffer &

nohup python a2c_gymnax.py --env_id Pendulum-v1 --track --n_estimators 1 --gpu_number 2 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_useBest --exp_name results_useBest_A2C  --use_best_config --action_type continuous --dynamic_buffer &
nohup python a2c_gymnax.py --env_id MountainCarContinuous-v0 --track --n_estimators 1 --gpu_number 1 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_useBest --exp_name results_useBest_A2C  --use_best_config --action_type continuous --dynamic_buffer &


nohup python a2c.py --env_id MiniGrid-Empty-Random-6x6-v0 --track --n_estimators 1 --gpu_number 0 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_useBest --exp_name results_useBest_A2C --use_best_config --dynamic_buffer &
nohup python a2c.py --env_id MiniGrid-DoorKey-5x5-v0 --track --n_estimators 1 --gpu_number 0 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_useBest --exp_name results_useBest_A2C --use_best_config --dynamic_buffer &
nohup python a2c.py --env_id MiniGrid-LavaGapS5-v0 --track --n_estimators 1 --gpu_number 0 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_useBest --exp_name results_useBest_A2C --use_best_config --dynamic_buffer &
nohup python a2c.py --env_id MiniGrid-LavaGapS7-v0 --track --n_estimators 1 --gpu_number 0 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_useBest --exp_name results_useBest_A2C --use_best_config --dynamic_buffer &
nohup python a2c.py --env_id MiniGrid-DistShift1-v0 --track --n_estimators 1 --gpu_number 0 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_useBest --exp_name results_useBest_A2C --use_best_config --dynamic_buffer &

nohup python a2c.py --env_id MiniGrid-DistShift4-v0 --track --n_estimators 1 --gpu_number 0 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_useBest --exp_name results_useBest_A2C --use_best_config --dynamic_buffer &
________________________________________________________________________________________________________________

nohup python a2c_gymnax.py --env_id CartPole-v1 --track --gpu_number 1 --random_trials 5 --actor sdt --run_name SDT_useBest --exp_name results_useBest_A2C ---use_best_config &
nohup python a2c_gymnax.py --env_id Acrobot-v1 --track --gpu_number 1 --random_trials 5 --actor sdt --run_name SDT_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id LunarLander-v2 --track --gpu_number 0 --random_trials 5 --actor sdt --run_name SDT_useBest --exp_name results_useBest_A2C --use_best_config &

nohup python a2c_gymnax.py --env_id Pendulum-v1 --track --gpu_number 2 --random_trials 5 --actor sdt --run_name SDT_useBest --exp_name results_useBest_A2C --use_best_config --action_type continuous &
nohup python a2c_gymnax.py --env_id MountainCarContinuous-v0 --track --gpu_number 3 --random_trials 5 --actor sdt --run_name SDT_useBest --exp_name results_useBest_A2C --use_best_config --action_type continuous &

nohup python a2c.py --env_id MiniGrid-Empty-Random-6x6-v0 --track --gpu_number 0 --random_trials 5 --actor sdt --run_name SDT_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-DoorKey-5x5-v0 --track --gpu_number 0 --random_trials 5 --actor sdt --run_name SDT_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-LavaGapS5-v0 --track --gpu_number 0 --random_trials 5 --actor sdt --run_name SDT_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-LavaGapS7-v0 --track --gpu_number 0 --random_trials 5 --actor sdt --run_name SDT_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-DistShift1-v0 --track --gpu_number 0 --random_trials 5 --actor sdt --run_name SDT_useBest --exp_name results_useBest_A2C --use_best_config &

nohup python a2c.py --env_id MiniGrid-DistShift4-v0 --track --gpu_number 0 --random_trials 5 --actor sdt --run_name SDT_useBest --exp_name results_useBest_A2C --use_best_config &

________________________________________________________________________________________________________________

nohup python a2c_gymnax.py --env_id CartPole-v1 --track --gpu_number 1 --random_trials 5 --actor d-sdt --run_name D-SDT_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c_gymnax.py --env_id Acrobot-v1 --track --gpu_number 1 --random_trials 5 --actor d-sdt --run_name D-SDT_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id LunarLander-v2 --track --gpu_number 1 --random_trials 5 --actor d-sdt --run_name D-SDT_useBest --exp_name results_useBest_A2C --use_best_config &

nohup python a2c_gymnax.py --env_id Pendulum-v1 --track --gpu_number 1 --random_trials 5 --actor d-sdt --run_name D-SDT_useBest --exp_name results_useBest_A2C --use_best_config --action_type continuous &
nohup python a2c_gymnax.py --env_id MountainCarContinuous-v0 --track --gpu_number 1 --random_trials 5 --actor d-sdt --run_name D-SDT_useBest --exp_name results_useBest_A2C --use_best_config --action_type continuous &

nohup python a2c.py --env_id MiniGrid-Empty-Random-6x6-v0 --track --gpu_number 0 --random_trials 5 --actor d-sdt --run_name D-SDT_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-DoorKey-5x5-v0 --track --gpu_number 0 --random_trials 5 --actor d-sdt --run_name D-SDT_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-LavaGapS5-v0 --track --gpu_number 0 --random_trials 5 --actor d-sdt --run_name D-SDT_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-LavaGapS7-v0 --track --gpu_number 0 --random_trials 5 --actor d-sdt --run_name D-SDT_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-DistShift1-v0 --track --gpu_number 0 --random_trials 5 --actor d-sdt --run_name D-SDT_useBest --exp_name results_useBest_A2C --use_best_config &

nohup python a2c.py --env_id MiniGrid-DistShift4-v0 --track --gpu_number 0 --random_trials 5 --actor d-sdt --run_name D-SDT_useBest --exp_name results_useBest_A2C --use_best_config &

________________________________________________________________________________________________________________

nohup python a2c_gymnax.py --env_id CartPole-v1 --track --gpu_number 0 --random_trials 5 --actor mlp --run_name MLP_useBest --exp_name results_useBest_A2C  --n_envs 16 --use_best_config &
nohup python a2c_gymnax.py --env_id Acrobot-v1 --track --gpu_number 0 --random_trials 5 --actor mlp --run_name MLP_useBest --exp_name results_useBest_A2C  --n_envs 16 --use_best_config &
nohup python a2c.py --env_id LunarLander-v2 --track --gpu_number 1 --random_trials 5 --actor mlp --run_name MLP_useBest --exp_name results_useBest_A2C  --n_envs 16 --use_best_config &

nohup python a2c_gymnax.py --env_id Pendulum-v1 --track --gpu_number 2 --random_trials 5 --actor mlp --run_name MLP_useBest --exp_name results_useBest_A2C  --n_envs 16 --use_best_config --action_type continuous &
nohup python a2c_gymnax.py --env_id MountainCarContinuous-v0 --track --gpu_number 3 --random_trials 5 --actor mlp --run_name MLP_useBest --exp_name results_useBest_A2C  --n_envs 16 --use_best_config --action_type continuous &

nohup python a2c.py --env_id MiniGrid-Empty-Random-6x6-v0 --track --gpu_number 0 --random_trials 5 --actor mlp --run_name MLP_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-DoorKey-5x5-v0 --track --gpu_number 0 --random_trials 5 --actor mlp --run_name MLP_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-LavaGapS5-v0 --track --gpu_number 0 --random_trials 5 --actor mlp --run_name MLP_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-LavaGapS7-v0 --track --gpu_number 0 --random_trials 5 --actor mlp --run_name MLP_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-DistShift1-v0 --track --gpu_number 0 --random_trials 5 --actor mlp --run_name MLP_useBest --exp_name results_useBest_A2C --use_best_config &

nohup python a2c.py --env_id MiniGrid-DistShift4-v0 --track --gpu_number 0 --random_trials 5 --actor mlp --run_name MLP_useBest --exp_name results_useBest_A2C --use_best_config &


________________________________________________________________________________________________________________

nohup python a2c_gymnax.py --env_id CartPole-v1 --track --gpu_number 1 --random_trials 5 --actor stateActionDT --depth 5 --run_name SA-DT_5_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c_gymnax.py --env_id Acrobot-v1 --track --gpu_number 1 --random_trials 5 --actor stateActionDT --depth 5 --run_name SA-DT_5_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id LunarLander-v2 --track --gpu_number 1 --random_trials 5 --actor stateActionDT --depth 5 --run_name SA-DT_5_useBest --exp_name results_useBest_A2C --use_best_config &

nohup python a2c_gymnax.py --env_id Pendulum-v1 --track --gpu_number 2 --random_trials 5 --actor stateActionDT --depth 5 --run_name SA-DT_5_useBest --exp_name results_useBest_A2C --use_best_config --action_type continuous &
nohup python a2c_gymnax.py --env_id MountainCarContinuous-v0 --track --gpu_number 2 --random_trials 5 --actor stateActionDT --depth 5 --run_name SA-DT_5_useBest --exp_name results_useBest_A2C --use_best_config --action_type continuous &


nohup python a2c.py --env_id MiniGrid-Empty-Random-6x6-v0 --track --gpu_number 0 --random_trials 5 --actor stateActionDT --depth 5 --run_name SA-DT_5_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-DoorKey-5x5-v0 --track --gpu_number 0 --random_trials 5 --actor stateActionDT --depth 5 --run_name SA-DT_5_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-LavaGapS5-v0 --track --gpu_number 0 --random_trials 5 --actor stateActionDT --depth 5 --run_name SA-DT_5_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-LavaGapS7-v0 --track --gpu_number 0 --random_trials 5 --actor stateActionDT --depth 5 --run_name SA-DT_5_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-DistShift1-v0 --track --gpu_number 0 --random_trials 5 --actor stateActionDT --depth 5 --run_name SA-DT_5_useBest --exp_name results_useBest_A2C --use_best_config &

nohup python a2c.py --env_id MiniGrid-DistShift4-v0 --track --gpu_number 0 --random_trials 5 --actor stateActionDT --depth 5 --run_name SA-DT_5_useBest --exp_name results_useBest_A2C --use_best_config &


________________________________________________________________________________________________________________

nohup python a2c_gymnax.py --env_id CartPole-v1 --track --gpu_number 2 --random_trials 5 --actor stateActionDT --depth 8 --run_name SA-DT_8_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c_gymnax.py --env_id Acrobot-v1 --track --gpu_number 2 --random_trials 5 --actor stateActionDT --depth 8 --run_name SA-DT_8_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id LunarLander-v2 --track --gpu_number 2 --random_trials 5 --actor stateActionDT --depth 8 --run_name SA-DT_8_useBest --exp_name results_useBest_A2C --use_best_config &

nohup python a2c_gymnax.py --env_id Pendulum-v1 --track --gpu_number 2 --random_trials 5 --actor stateActionDT --depth 8 --run_name SA-DT_8_useBest --exp_name results_useBest_A2C --use_best_config --action_type continuous &
nohup python a2c_gymnax.py --env_id MountainCarContinuous-v0 --track --gpu_number 2 --random_trials 5 --actor stateActionDT --depth 8 --run_name SA-DT_8_useBest --exp_name results_useBest_A2C --use_best_config --action_type continuous &


nohup python a2c.py --env_id MiniGrid-Empty-Random-6x6-v0 --track --gpu_number 0 --random_trials 5 --actor stateActionDT --depth 8 --run_name SA-DT_8_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-DoorKey-5x5-v0 --track --gpu_number 0 --random_trials 5 --actor stateActionDT --depth 8 --run_name SA-DT_8_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-LavaGapS5-v0 --track --gpu_number 0 --random_trials 5 --actor stateActionDT --depth 8 --run_name SA-DT_8_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-LavaGapS7-v0 --track --gpu_number 0 --random_trials 5 --actor stateActionDT --depth 8 --run_name SA-DT_8_useBest --exp_name results_useBest_A2C --use_best_config &
nohup python a2c.py --env_id MiniGrid-DistShift1-v0 --track --gpu_number 0 --random_trials 5 --actor stateActionDT --depth 8 --run_name SA-DT_8_useBest --exp_name results_useBest_A2C --use_best_config &

nohup python a2c.py --env_id MiniGrid-DistShift4-v0 --track --gpu_number 0 --random_trials 5 --actor stateActionDT --depth 8 --run_name SA-DT_8_useBest --exp_name results_useBest_A2C --use_best_config &


_________________________________________________________________________________________________________________________________________________
________________________________________________________       HPO CODE      ____________________________________________________________________
_________________________________________________________________________________________________________________________________________________

________________________________________________________       SYMPOL      ____________________________________________________________________
./run_ppo.sh 4 python a2c_gymnax.py --env_id CartPole-v1 --track --n_estimators 1 --gpu_number 0 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_HPO --exp_name results_HPO_A2C --optimize_config --dynamic_buffer;
./run_ppo.sh 4 python a2c_gymnax.py --env_id Acrobot-v1 --track --n_estimators 1 --gpu_number 0 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_HPO --exp_name results_HPO_A2C --optimize_config --dynamic_buffer;
./run_ppo.sh 6 python a2c.py --env_id LunarLander-v2 --track --n_estimators 1 --gpu_number 1 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_HPO --exp_name results_HPO_A2C --optimize_config --dynamic_buffer;

./run_ppo.sh 6 python a2c_gymnax.py --env_id Pendulum-v1 --track --n_estimators 1 --gpu_number 2 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_HPO --exp_name results_HPO_A2C  --optimize_config --action_type continuous --dynamic_buffer;
./run_ppo.sh 6 python a2c_gymnax.py --env_id MountainCarContinuous-v0 --track --n_estimators 1 --gpu_number 3 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_HPO --exp_name results_HPO_A2C --optimize_config --action_type continuous --dynamic_buffer;

./run_ppo.sh 3 python a2c.py --env_id MiniGrid-Empty-Random-6x6-v0 --track --n_estimators 1 --gpu_number 0 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_HPO --exp_name results_useBest_A2C --optimize_config --dynamic_buffer;
./run_ppo.sh 3 python a2c.py --env_id MiniGrid-DoorKey-5x5-v0 --track --n_estimators 1 --gpu_number 0 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_HPO --exp_name results_useBest_A2C --optimize_config --dynamic_buffer;
./run_ppo.sh 3 python a2c.py --env_id MiniGrid-LavaGapS5-v0 --track --n_estimators 1 --gpu_number 0 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_HPO --exp_name results_useBest_A2C --optimize_config --dynamic_buffer;
./run_ppo.sh 3 python a2c.py --env_id MiniGrid-LavaGapS7-v0 --track --n_estimators 1 --gpu_number 0 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_HPO --exp_name results_useBest_A2C --optimize_config --dynamic_buffer;
./run_ppo.sh 3 python a2c.py --env_id MiniGrid-DistShift1-v0 --track --n_estimators 1 --gpu_number 0 --random_trials 5 --n_trials 60 --actor sympol --run_name SYMPOL_HPO --exp_name results_useBest_A2C --optimize_config --dynamic_buffer;



________________________________________________________       SDT      ____________________________________________________________________


./run_ppo.sh 5 python a2c_gymnax.py --env_id CartPole-v1 --track --gpu_number 0 --random_trials 5 --n_trials 60 --actor sdt --run_name SDT_HPO --exp_name results_HPO_A2C --optimize_config;
./run_ppo.sh 5 python a2c_gymnax.py --env_id Acrobot-v1 --track --gpu_number 0 --random_trials 5 --n_trials 60 --actor sdt --run_name SDT_HPO --exp_name results_HPO_A2C --optimize_config;
./run_ppo.sh 8 python a2c.py --env_id LunarLander-v2 --track --gpu_number 1 --random_trials 5 --n_trials 60 --actor sdt --run_name SDT_HPO --exp_name results_HPO_A2C --optimize_config;

./run_ppo.sh 5 python a2c_gymnax.py --env_id Pendulum-v1 --track --gpu_number 3 --random_trials 5 --n_trials 60 --actor sdt --run_name SDT_HPO --exp_name results_HPO_A2C --optimize_config --action_type continuous;
./run_ppo.sh 5 python a2c_gymnax.py --env_id MountainCarContinuous-v0 --track --gpu_number 3 --random_trials 5 --n_trials 60 --actor sdt --run_name SDT_HPO --exp_name results_HPO_A2C --optimize_config --action_type continuous;

./run_ppo.sh 4 python a2c.py --env_id MiniGrid-Empty-Random-6x6-v0 --track --gpu_number 0 --random_trials 5 --n_trials 60 --actor sdt --run_name SDT_HPO --exp_name results_useBest_A2C --optimize_config;
./run_ppo.sh 3 python a2c.py --env_id MiniGrid-DoorKey-5x5-v0 --track --gpu_number 0 --random_trials 5 --n_trials 60 --actor sdt --run_name SDT_HPO --exp_name results_useBest_A2C --optimize_config;
./run_ppo.sh 3 python a2c.py --env_id MiniGrid-LavaGapS5-v0 --track --gpu_number 0 --random_trials 5 --n_trials 60 --actor sdt --run_name SDT_HPO --exp_name results_useBest_A2C --optimize_config;

./run_ppo.sh 4 python a2c.py --env_id MiniGrid-LavaGapS7-v0 --track --gpu_number 0 --random_trials 5 --n_trials 60 --actor sdt --run_name SDT_HPO --exp_name results_useBest_A2C --optimize_config;
./run_ppo.sh 4 python a2c.py --env_id MiniGrid-DistShift1-v0 --track --gpu_number 0 --random_trials 5 --n_trials 60 --actor sdt --run_name SDT_HPO --exp_name results_useBest_A2C --optimize_config;

________________________________________________________       MLP      ____________________________________________________________________

./run_ppo.sh 4 python a2c_gymnax.py --env_id CartPole-v1 --track --gpu_number 0 --random_trials 5 --n_trials 60 --actor mlp --run_name MLP_HPO --exp_name results_HPO_A2C --optimize_config;
./run_ppo.sh 4 python a2c_gymnax.py --env_id Acrobot-v1 --track --gpu_number 0 --random_trials 5 --n_trials 60 --actor mlp --run_name MLP_HPO --exp_name results_HPO_A2C --optimize_config;
./run_ppo.sh 6 python a2c.py --env_id LunarLander-v2 --track --gpu_number 1 --random_trials 5 --n_trials 60 --actor mlp --run_name MLP_HPO --exp_name results_HPO_A2C --optimize_config;

./run_ppo.sh 6 python a2c_gymnax.py --env_id Pendulum-v1 --track --gpu_number 2 --random_trials 5 --n_trials 60 --actor mlp --run_name MLP_HPO --exp_name results_HPO_A2C --optimize_config --action_type continuous;
./run_ppo.sh 6 python a2c_gymnax.py --env_id MountainCarContinuous-v0 --track --gpu_number 3 --random_trials 5 --n_trials 60 --actor mlp --run_name MLP_HPO --exp_name results_HPO_A2C --optimize_config --action_type continuous;

./run_ppo.sh 4 python a2c.py --env_id MiniGrid-Empty-Random-6x6-v0 --track --gpu_number 0 --random_trials 5 --n_trials 60 --actor mlp --run_name MLP_HPO --exp_name results_useBest_A2C --optimize_config;
./run_ppo.sh 3 python a2c.py --env_id MiniGrid-DoorKey-5x5-v0 --track --gpu_number 0 --random_trials 5 --n_trials 60 --actor mlp --run_name MLP_HPO --exp_name results_useBest_A2C --optimize_config;
./run_ppo.sh 3 python a2c.py --env_id MiniGrid-LavaGapS5-v0 --track --gpu_number 0 --random_trials 5 --n_trials 60 --actor mlp --run_name MLP_HPO --exp_name results_useBest_A2C --optimize_config;
./run_ppo.sh 4 python a2c.py --env_id MiniGrid-LavaGapS7-v0 --track --gpu_number 0 --random_trials 5 --n_trials 60 --actor mlp --run_name MLP_HPO --exp_name results_useBest_A2C --optimize_config;
./run_ppo.sh 4 python a2c.py --env_id MiniGrid-DistShift1-v0 --track --gpu_number 0 --random_trials 5 --n_trials 60 --actor mlp --run_name MLP_HPO --exp_name results_useBest_A2C --optimize_config;
