agent_cfg:
  _target_: alg.policy.policy_updates.PolicyUpdates
  actor_cfg: ${agent_cfg.diag_gaussian_actor}
  actor_lr: 3e-4
  actor_lr_min: 0.000001
  actor_lr_step_max: ${num_train_steps} 
  actor_betas: [0.9, 0.99]
  batch_size: -1 
  sub_batch_size: 4096
  train_gradient_update: 1
  log_steps: 250
  normalize_reward: True
  sa_policy: False
  loss_fn:
    name: eff_sample
    params:
      norm_weights: True
      target_eff: 0.1
  
diag_gaussian_actor:
  _target_: alg.policy.actor_model.DiagGaussianActor
  hidden_depth: 2
  hidden_dim: 512
  init_std: 1.0
  dropout: 0.25
