agent:
  name: gmm_ssac
  class: algorithm.gmm_ssac.GMMSSACAgent
  params:
    obs_dim: ??? # to be specified later
    action_dim: ??? # to be specified later
    action_range: ??? # to be specified later
    device: ${device}
    critic_cfg: ${double_q_critic}
    safety_critic_cfg: ${safety_critic}
    actor_cfg: ${diag_gaussian_actor}
    discount: 0.99
    init_temperature: 0.6931
    alpha_lr: 1e-3
    alpha_betas: [0.9, 0.999]
    actor_lr: 3e-4
    actor_betas: [0.9, 0.999]
    actor_update_frequency: 1
    critic_lr: 1e-3
    critic_betas: [0.9, 0.999]
    critic_tau: 0.005
    critic_target_update_frequency: 2
    batch_size: 256
    learnable_temperature: true
    cost_limit: 10
    max_episode_len: 1000
    risk_level: ${risk_level}
    damp_scale: 10
    lr_scale: 1

double_q_critic:
  class: algorithm.critic.DoubleQCritic
  params:
    obs_dim: ${agent.params.obs_dim}
    action_dim: ${agent.params.action_dim}
    hidden_dim: 256
    hidden_depth: 2

safety_critic:
  class: algorithm.critic.SafetyCriticGMM
  params:
    obs_dim: ${agent.params.obs_dim}
    action_dim: ${agent.params.action_dim}
    hidden_dim: 256
    hidden_depth: 2

diag_gaussian_actor:
  class: algorithm.actor.DiagGaussianActor
  params:
    obs_dim: ${agent.params.obs_dim}
    action_dim: ${agent.params.action_dim}
    hidden_depth: 2
    hidden_dim: 256
    log_std_bounds: [-20, 2]