classname: crl.algorithms.SAC
params:
  learning_device: cuda:0
  acquisition_device: cuda:0
  buffer_device: cuda:0

  optimizer_policy:
    classname: torch.optim.Adam
    lr: 1e-3

  optimizer_q:
    classname: torch.optim.Adam
    lr: 1e-3

  optimizer_entropy:
    classname: torch.optim.Adam
    lr: 3e-4

  control_every_n_epochs: 100
  n_control_rollouts: 1

  n_processes: 0

  reward_scaling: 1.
  policy_update_delay: 2
  target_update_delay: 4
  time_limit: 0
  
  n_timesteps: 2
  batch_size: 256

  init_temperature: 2.
  target_multiplier: 2.
  
  clip_grad: 0.
  inner_epochs: ${scenario.n_train_envs}
  grad_updates_per_step: 0.5
  discount_factor: 0.99
  update_target_tau: 0.005
  buffer_time_size: 2
  buffer_size: 1_280_000
  initial_buffer_size: 12_800