defaults:
  - /agent/vce@_here_
  - override dynamics_model: variational_reward_model
#  - override actor: two_heads

_target_: erl_lib.agent.ve_q.VariationalValueGradient
#_target_: erl_lib.agent.ve_q.SoftDistValueGradient
beta_min: 1e-20
v_lr_ratio: 1.0
lr_norm: 0.1
#mvve_improvement: True
max_beta_inc: 2.0
beta_update: relu_std
beta_init: 1.0
lr_beta: 0.1
target_improvement: 0.25
extreme_epi_q: False
mve_horizon: 1
discount_beta: False
#mean_improve: True
kl_improve: True
sample_std: False
#detached_std: False
smoothed_baseline: True
episode_wise_norm: False