[general]
device = "auto"

[environment]
population_size = 15
max_generations = 100
nr_objectives = 2
nr_actions = 2
problems = ['dtlz1','dtlz2','dtlz3','dtlz4']

[policy]
actor_input_dim = 2
actor_hidden_dim = 64
critic_input_dim = 2
critic_hidden_dim = 64

[ppo]
training_comment = "PPO_dtlz1"
seed = 0
buffer_size = 4096
batch_size = 64
learning_rate = 1e-3
lr_decay = true
gamma = 0.99
max_epoch = 12
nr_of_environments = 1
step_per_epoch = 1000
episode_per_collect = 10
replay_buffer_size = 10000
gae_lambda = 0.95
max_grad_norm = 0.5
vf_coef = 0.25
ent_coef = 0.0
reward_normalization = true
action_scaling = true
action_bound_method = "clip"
eps_clip = 0.2
value_clip = false
dual_clip = 'None'
advantage_normalization = 0
recompute_advantage = 1
