[defaults]
size = 18
seeds = [42, 43, 44, 45, 46]
batch_size = 1024
eps = 0.1
epochs = 4000
device = "cpu"

lr_pf = 5e-3
lr_pb = 5e-3
lr_logz = 5e-2

div_lr_pf = 5e-3
div_lr_pb = 5e-3
div_lr_logz = 5e-2
threshold = 0.2

teacher_lr_pf = 5e-3
teacher_lr_pb = 5e-3
teacher_lr_logz = 5e-2
teacher_C = 19.0
teacher_eps = 1e-8
teacher_alpha = 0.0

sa_lr_pf = 5e-3
sa_lr_pb = 5e-3
sa_lr_logz = 5e-2
rnd_lr = 5e-3

log_every = 10
eval_every = 100
save_every = 1000
marginal_batch = 15

hidden_dim_f = 128
num_layers_f = 3
n_freq_f = 8

hidden_dim_b = 128
num_layers_b = 1
n_freq_b = 8

div_hidden_dim_f = 64
div_num_layers_f = 2
div_n_freq_f = 16

div_hidden_dim_b = 64
div_num_layers_b = 2
div_n_freq_b = 8

[[runs]]
run_id = "run_8g_reward_alpha_0.25_eps_0.1"
reward_kind = "8g"
eps = 0.1
reward_alpha = 0.25

[[runs]]
run_id = "run_8g_reward_alpha_1.0_eps_0.1"
reward_kind = "8g"
eps = 0.1
reward_alpha = 1

[[runs]]
run_id = "run_rings_reward_alpha_0.25_eps_0.1"
reward_kind = "rings"
eps = 0.1
reward_alpha = 1

[[runs]]
run_id = "run_rings_reward_alpha_1.0_eps_0.1"
reward_kind = "rings"
eps = 0.1
reward_alpha = 1    


######## old runs below ########
[[runs]]
run_id = "run_8g"
reward_kind = "8g"

[[runs]]
run_id = "run_rings"
reward_kind = "rings"

[[runs]]
run_id = "run_rings_v2"
reward_kind = "rings"
eps = 0.2

[[runs]]
run_id = "run_rings_v3"
reward_kind = "rings"
eps = 0.3
threshold = 0.3

[[runs]]
run_id = "run_rings_v4"
reward_kind = "rings"
eps = 0.2
reward_alpha = 0.25

[[runs]]
run_id = "run_rings_v5"
reward_kind = "rings"
eps = 0.1
reward_alpha = 0.25


[[runs]]
run_id = "run_rings_v6"
reward_kind = "rings"
eps = 0.1
reward_alpha = 1

[[runs]]
run_id = "run_8g_reward_alpha0.25_eps0.1"
reward_kind = "8g"
eps = 0.1
reward_alpha = 0.25