# @package _global_

agent:
  name: sac
  _target_: agent.sac.SAC
  obs_dim: ??? # to be specified later
  action_dim: ??? # to be specified later

  critic_cfg: ${q_net}
  actor_cfg: ${diag_gaussian_actor}
  init_temp: 1e-2 # use a low temp for IL

  alpha_lr: 3e-4
  alpha_betas: [0.9, 0.999]

  actor_lr: 3e-4
  actor_betas: [0.9, 0.999]
  actor_update_frequency: 1

  critic_lr: 3e-4
  critic_betas: [0.9, 0.999]
  critic_tau: 0.005
  critic_target_update_frequency: 1

  # learn temperature coefficient (disabled by default)
  learn_temp: false

  # Use either value_dice actor or normal SAC actor loss
  vdice_actor: false

  hidden_dim: 256
  hidden_depth: 2
  log_std_bounds: [-5, 2]

cuda_deterministic: False
device: ??? # to be specified later

gamma: 0.99
seed: 0
pretrain: null

num_seed_steps: 0 # Don't need seeding for IL (Use 1000 for RL)
only_expert_states: False

train:
  batch: 32
  use_target: False
  soft_update: False

expert:
  demos: 1
  subsample_freq: 1

eval:
  policy: 
  threshold:
  use_baselines: False
  eps: 10
  transfer: False
  expert_env: ''

method:
  type: iq
  loss: value

  constrain: False
  grad_pen: False
  chi: False
  tanh: False
  regularize: False
  div:

  alpha: 0.5
  lambda_gp: 10
  mix_coeff: 1

# Do offline learning
offline: False
# Number of actor updates per env step
num_actor_updates: 1