
optimization_algorithm:
  _target_: optim_modules.Reinforce
  # policy: ${policy}
  # gpu: ${gpu}
  max_grad_norm: ${max_grad_norm}
  lr: ${lr}
  rw_norm: ${rw_norm}
  rw_clip: ${rw_clip}
  kl_ref_coeff: ${kl_ref_coeff}


# policy:  
# gpu:  
max_grad_norm: 1e-3
lr: 2e-3
rw_norm: 0
rw_clip: null
kl_ref_coeff: 0 
rw_strategy: rN${rw_norm}C${rw_clip}
optim_name: RL-lr${lr}-mGN${max_grad_norm}-klC${kl_ref_coeff}-r${rw_strategy}