agent:
  name: sac_diverse
  class: agent.sac_diverse.SACAgent_Diverse
  params:
    obs_dim: ??? # to be specified later
    action_dim: ??? # to be specified later
    action_range: ??? # to be specified later
    device: ${device}
    critic_cfg: ${double_q_critic}
    actor_cfg: ${diag_gaussian_actor}
    discount: 0.99
    init_temperature: 0.1
    alpha_lr: 1e-4
    alpha_betas: [0.9, 0.999]
    actor_lr: 1e-4
    actor_betas: [0.9, 0.999]
    actor_update_frequency: 1
    critic_lr: 1e-4
    critic_betas: [0.9, 0.999]
    critic_tau: 0.005
    critic_target_update_frequency: 2
    batch_size: 1024 # 1024 for Walker, 512 for Meta-world
    learnable_temperature: true
    beta_init: 0.5
    wandb: ${wandb}
    
double_q_critic:
  class: agent.critic.DoubleQCritic
  params:
    obs_dim: ${agent.params.obs_dim}
    action_dim: ${agent.params.action_dim}
    hidden_dim: 1024
    hidden_depth: 2
    layernorm: false
    
diag_gaussian_actor:
  class: agent.actor.DiagGaussianActor
  params:
    obs_dim: ${agent.params.obs_dim}
    action_dim: ${agent.params.action_dim}
    hidden_depth: 2
    hidden_dim: 1024
    log_std_bounds: [-5, 2]
