#!/bin/bash

SHARED_ARGS="--first_seed 1 --last_seed 1 --replay_buffer_capacity 10_000 \
    --batch_size 32 --update_horizon 1 --gamma 0.99 --horizon 1_000 --n_training_steps_per_epoch 10_000 \
    --update_to_data 1 --target_update_frequency 200 --n_initial_samples 1_000 --mlp_n_layers_range 1 3 \
    --mlp_n_neurons_range 25 200 --learning_rate_range 5 2"

launch_job/lunar_lander/local_adadqn.sh --experiment_name hp_update_freq_10000_elitism_no_reset \
    --disable_wandb $SHARED_ARGS --n_epochs 50 --n_networks 5 --exploitation_type elitism --epsilon_end 0.01 \
    --epsilon_duration 1_000 --hp_update_frequency 10000

launch_job/lunar_lander/local_searldqn.sh --experiment_name min_steps_eval_2000_elitism_reset_full_train \
    --disable_wandb $SHARED_ARGS --n_epochs 50 --n_networks 5 --exploitation_type elitism --min_steps_evaluation 2000 

launch_job/lunar_lander/local_rsdqn.sh --experiment_name ne30 $SHARED_ARGS --n_epochs 300 --epsilon_end 0.01 \
    --disable_wandb --epsilon_duration 1_000 --hp_update_per_epoch 30

launch_job/lunar_lander/local_dehbdqn.sh --experiment_name minne10_maxne50 $SHARED_ARGS --n_epochs 300 --epsilon_end 0.01 \
    --disable_wandb --epsilon_duration 1_000 --min_n_epochs_per_hp 10 --max_n_epochs_per_hp 50