defaults:
  - /agent/vce@_here_
#  - override dynamics_model: variational_reward_model
#  - override actor: two_heads

_target_: erl_lib.agent.ve_q.SoftDistValueGradient
beta_min: 1e-20
lr_norm: 0.1
#mvve_improvement: True
beta_update: opt
beta_init: 1e-20
lr_beta: 0.1
target_improvement: 0.25
mve_horizon: 1
discount_beta: False
mean_improve: True

