defaults:
  - _self_
  - env: lava_2a

algo: "MAGAIL"
use_shadow_reward: True

pweight: 1  # only use the learned reward for policy training
pdecay: 1
pdecay_interval: 1
use_suboptimal: False

ppo_epoch: 4
num_mini_batch: 4
buffer_size: 4096
clip_eps: 0.2
lambda_entropy: 0.01
value_loss_coef: 0.5
max_grad_norm: 1.0

lr: 0.0001
gamma: 0.99
lambda_gae_adv: 0.98

clip_grad: False
add_noise: False
use_gae: False
use_state_norm: False
use_value_norm: False

discrim_full_state: False
discrim_co_trained: False

frames: 5000000

seed: 0
run: -1
save_interval: False

result_path: "results"
