defaults:

  logdir: null
  traindir: null
  evaldir: null
  offline_traindir: ''
  offline_evaldir: ''
  seed: 0
  deterministic_run: False
  steps: 1e6
  parallel: False
  eval_every: 1e4
  eval_episode_num: 10
  log_every: 1e4
  reset_every: 0
  device: 'cuda:0'
  compile: True
  precision: 32
  debug: False
  expl_gifs: False
  video_pred_log: True

  # Environment
  task: 'dmc_walker_walk'
  size: [64, 64]
  envs: 1
  action_repeat: 2
  time_limit: 1000
  grayscale: False
  prefill: 2500
  eval_noise: 0.0
  reward_EMA: True

  # Model
  dyn_cell: 'gru_layer_norm'
  dyn_hidden: 512


  z_dyn_deter: 512
  z_dyn_stoch: 32
  z_dyn_discrete: 32
  c_dyn_deter: 512
  c_dyn_stoch: 32
  c_dyn_discrete: 32
  z_infer_scale: 1.0

  dyn_input_layers: 1
  dyn_output_layers: 1
  dyn_rec_depth: 1
  dyn_shared: False
  dyn_mean_act: 'none'
  dyn_std_act: 'sigmoid2'
  dyn_min_std: 0.1
  dyn_temp_post: True
  grad_heads: ['decoder', 'reward', 'cont']
  units: 512
  reward_layers: 2
  cont_layers: 2
  value_layers: 2
  actor_layers: 2
  act: 'SiLU'
  norm: 'LayerNorm'
  encoder:
    {mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: 'LayerNorm', cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, symlog_inputs: True}
  decoder:
    {mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: 'LayerNorm', cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse}
  value_head: 'symlog_disc'
  reward_head: 'symlog_disc'
  z_dyn_scale: 0.5
  z_rep_scale: 0.1
  z_kl_free: 1.0

  c_dyn_scale: 0.5
  c_rep_scale: 0.1
  c_kl_free: 1.0

  cont_scale: 1.0
  reward_scale: 1.0
  weight_decay: 0.0
  unimix_ratio: 0.01
  action_unimix_ratio: 0.01
  initial: 'zeros'

  # Training
  batch_size: 16
  batch_length: 64
  train_ratio: 512
  pretrain: 100
  model_lr: 1e-4
  opt_eps: 1e-8
  grad_clip: 1000
  value_lr: 3e-5
  actor_lr: 3e-5
  ac_opt_eps: 1e-5
  value_grad_clip: 100
  actor_grad_clip: 100
  dataset_size: 1000000
  slow_value_target: True
  slow_target_update: 1
  slow_target_fraction: 0.02
  opt: 'adam'

  # Behavior.
  discount: 0.997
  discount_lambda: 0.95
  imag_horizon: 15
  imag_gradient: 'dynamics'
  imag_gradient_mix: 0.0
  imag_sample: True
  actor_dist: 'normal'
  actor_entropy: 3e-4
  actor_state_entropy: 0.0
  actor_init_std: 1.0
  actor_min_std: 0.1
  actor_max_std: 1.0
  actor_temp: 0.1
  expl_amount: 0.0
  eval_state_mean: False
  collect_dyn_sample: True
  behavior_stop_grad: True
  value_decay: 0.0
  future_entropy: False

  # Exploration
  expl_behavior: 'greedy'
  expl_until: 0
  expl_extr_scale: 0.0
  expl_intr_scale: 1.0
  disag_target: 'stoch'
  disag_log: True
  disag_models: 10
  disag_offset: 1
  disag_layers: 4
  disag_units: 400
  disag_action_cond: False


dmc_vision_bg:
  task: 'dmcbg_walker_walk'
  difficulty: 'hard'
  dynamic: True
  datapath: 'nothing'
  steps: 1e6
  action_repeat: 2
  envs: 4
  train_ratio: 512
  video_pred_log: true
  encoder: {mlp_keys: '$^', cnn_keys: 'image'}
  decoder: {mlp_keys: '$^', cnn_keys: 'image'}


debug:
  debug: True
  pretrain: 1
  prefill: 1
  batch_size: 10
  batch_length: 20
