defaults:
  - /agent/dhmbpo@_here_

num_policy_opt_per_step: 0.1
max_batch_size: 3000
seed_iters: 5
iters_per_epoch: 5
model_train:
  init:
    improvement_threshold: 0.001
    keep_threshold: 0.001
  val_batch_size: 256
  base_epochs: 1
dynamics_model:
  num_members: 4
  weight_decay_ratios:
    - 0.1
    - 0.1
    - 1
  dim_hidden: 16
  uncertainty_bonus:
rollout_horizon: 3
warm_start: False
training_rollout_horizon: 3
mve_horizon: 3
rollout_freq: 50
num_critic_iter: 1