ac_layer_dims: 128,128
d_layer_dims: 128,128
lr: 3e-4
num_steps: 10
gamma: 0.99
mini_batch_size: 5
ppo_epochs: 4
threshold_reward: 10
use_actions: True
discriminator_type: airl
use_irm: True
max_frames: 500000
env_name: Pendulum-v0