[ppo]
entropy_coef = 1e-2
critic_coef = 0.5
actor_lr = 3e-4
critic_lr = 3e-4
hidden_dim = 256
gamma = 0.99
lambda_ = 0.95
max_clip = 0.2
train_epoch = 1
traj_length = 500
batch_size = 64
max_grad_norm = 0.5
layer_num = 3
activation_function = torch.relu
last_activation = torch.tanh
trainable_std = True
on_policy = True
lam = 0.1
b = 50
