[ppo]
entropy_coef = 1e-2
critic_coef = 0.5
actor_lr = 3e-4
critic_lr = 3e-4
hidden_dim = 256
gamma = 0.99
lambda_ = 0.95
max_clip = 0.2
train_epoch = 5
traj_length = 5000
batch_size = 64
max_grad_norm = 0.5
layer_num = 3
activation_function = torch.relu
last_activation = torch.tanh
trainable_std = True
on_policy = True

[sac]
alpha_init = 0.2
gamma = 0.99
q_lr = 3e-4
actor_lr = 3e-4
alpha_lr = 3e-4
soft_update_rate = 0.005
hidden_dim = 256
learn_start_size = 1000
memory_size = 1e+6
batch_size = 64
layer_num = 3
activation_function = torch.relu
last_activation = None
trainable_std = True
on_policy = False

[ddpg]
gamma = 0.99
q_lr = 3e-4
actor_lr = 3e-4
soft_update_rate = 0.005
hidden_dim = 256
learn_start_size = 1000
memory_size = 1e+6
batch_size = 64
layer_num = 3
activation_function = torch.relu
last_activation = torch.tanh
trainable_std = False
on_policy = False
