common:
  buffer_size: 100_0000
  steps: 100_0000
  hidden_size: 256
  hidden_layers: 2
  actor_activation: 'relu'
  critic_activation: 'relu'
  apply_layer_norm: false
  actor_lr: 0.0003
  critic_lr: 0.0003
  gamma: 0.99
  train_ratio: 1
  batch_size: 256
  self_expl_start: 0
  learning_start: null
  tau: 0.005
  eval_envs: 10
  save_points: 0
  eval_points: 100
  log_points: 500
  eval_episodes: 30
  final_eval_episodes: 100
  reward_scale: 1.0
  gpuid: 0
  seed: 42
  log_trajs: false
  formal: false
  timer: false
MultiGoalPointMaze:
  maze_map: 'simple'
  steps: 10_0000
  start_alpha: 1.0
  end_alpha: 1.0
  tar_entropy: 1.0
  beta: 0.8
  reward_type: 'sparse'
  eval_points: 50
  log_trajs: true
  init_log_alpha: 0.5
  eval_episodes: 40
  final_eval_episodes: 80
  ent_est_components: 4
  alpha_update_itv: 1000
  alpha_lr: 0.1
MultiGoalAntMaze:
  maze_map: 'medium'
  steps: 50_0000
  beta: 0.5
  reward_type: 'dense'
  eval_points: 50
  log_trajs: true
  init_log_alpha: 0.5
  eval_episodes: 40
  final_eval_episodes: 80
MarioLevelGen:
  smbgen_style: MultiFacet
  eval_envs: 30
  steps: 500000
  alpha_update_itv: 2000
  alpha_lr: 0.1
SAC:
  tar_entropy: '-1d'
DrAC0:
  reg_samples: 8
  start_alpha: 1.0
  end_alpha: 1.0
  end_rate_alpha: 0.5
  z_dim: 16
DrAC:
  pg_samples: 1
  qt_samples: 1
  reg_samples: 8
  beta: 0.15
  z_dim: 16
  init_log_alpha: 0.0
  z_distribution: 'uniform'
  actor_type: amortized
DACER:
  l: 0.1
  tar_entropy: '-0.9d'
  alpha_lr: 0.03
  ent_est_components: 3
  policy_delay: 2
  alpha_update_itv: 10000
  ent_est_samples: 200
  reward_scale: 0.2
  tanh_out: false
debug:
  steps: 2000
  batch_size:  16
  log_points: 50
  eval_points: 20
  eval_episodes: 2
  eval_envs: 2
  hidden_size: 16
  hidden_layers: 2
pm_simple:
  steps: 10_0000
  maze_map: 'simple'
pm_medium:
  steps: 20_0000
  maze_map: 'medium'
pm_hard:
  steps: 50_0000
  maze_map: 'hard'
  eval_episodes: 80
  final_eval_episodes: 160
  ent_est_components: 8
Humanoid:
  l: 0.15
HalfCheetah:
  l: 0.15
Humanoid-v5:
  l: 0.15
HalfCheetah-v5:
  l: 0.15
layer3:
  hidden_layers: 3
  actor_activation: 'gelu'
  critic_activation: 'gelu'
dacer_fine_tuned:
  actor_lr: 0.0001
  critic_lr: 0.0001
  hidden_layers: 3
  actor_activation: 'mish'
  critic_activation: 'gelu'
  policy_delay: 2
