defaults:
  - base_onpolicy
  - _self_

agent_name: ppo_sn

# PPO params
agent:
  _target_: ppo.ModularPPO
  device: ${device}
  actor_critic_model:
    _target_: online_actor_critics.DiscreteActorCritic
    modules:
      _target_: custom_networks.make_impala_modules
      obs_dims: ${obs_dims}
      n_actions: ${n_actions}
      channels: [16, 32, 32]
      hidden_units: 256
      shared_conv_trunk: true
      shared_fc_head: true
      pre_final_sn: true
  optimizer:
    _target_: ppo.th.optim.Adam
    lr: 5e-4
    eps: 1e-5
  clipping: 0.2
  value_clipping: 0.2
  ent_coeff: 0.01
  val_coeff: 0.5
  max_gradient_norm: 0.5

gamma: 0.999