# @package _global_

agent:
  name: sac
  _target_: agent.sac.SAC
  obs_dim: ??? # to be specified later
  action_dim: ??? # to be specified later

  critic_cfg: ${q_net}
  actor_cfg: ${diag_gaussian_actor}
  init_temp: 1e-2 # use a low temp for IL

  disc_cfg: 
    _target_: agent.sac_models.Discriminator
    obs_dim: ${agent.obs_dim}
    action_dim: ${agent.action_dim}
    hidden_dim: 256
    hidden_depth: 3
    reward_factor: 1.0

  value_cfg: ${value_network}

  alpha_lr: 3e-4
  alpha_betas: [0.9, 0.999]

  disc_lr: 1e-4
  disc_betas: [0.9, 0.999]

  actor_lr: 3e-4
  actor_betas: [0.9, 0.999]
  actor_update_frequency: 1

  critic_lr: 3e-4
  critic_betas: [0.9, 0.999]
  critic_tau: 0.005
  critic_target_update_frequency: 1

  value_lr: 3e-5

  learn_temp: false

  vdice_actor: false
  pen_bad: False

q_net:
  _target_: agent.sac_models.DoubleQCritic
  obs_dim: ${agent.obs_dim}
  action_dim: ${agent.action_dim}
  hidden_dim: 256
  hidden_depth: 3

diag_gaussian_actor:
  _target_: agent.sac_models.DiagGaussianActor
  obs_dim: ${agent.obs_dim}
  action_dim: ${agent.action_dim}
  hidden_dim: 256
  hidden_depth: 3
  log_std_bounds: [-5, 2]

value_network:
    _target_: agent.sac_models.SingleV
    obs_dim: ${agent.obs_dim}
    action_dim: ${agent.action_dim}
    hidden_dim: 256
    hidden_depth: 3