defaults:

  logdir: null
  traindir: null
  evaldir: null
  offline_traindir: ''
  offline_evaldir: ''
  seed: 0
  deterministic_run: False
  steps: 1e6
  parallel: False
  eval_every: 1e4
  eval_episode_num: 10
  log_every: 1e4
  reset_every: 0
  device: 'cuda:0'
  compile: True
  precision: 32
  debug: False
  video_pred_log: True
  #[todo] start
  add_token_embed: False
  multi_actor: True
  multi_actor_mode: "distinct"
  multi_actor_sample: "thread" # thread: multi thread in _policy; stream: cuda.stream in _policy, need enough cuda memory when env num increase; None: No speed up
  multi_actor_train: "stream" #  stream: cuda.stream in _train, need enough cuda memory when env num increase; None: No speed up
  actor_train_mode: "None" # 仅当muli_actor_mode==distinct时有效。 seperate: 一个actor在训练的时候，其它actor不训练
  actor_inference_mode: "None" # seperate: 会有一个policy时的random_actor
  actor_train_seperate_interval: 5000 # 仅当actor_train_mode==seperate时有效
  actor_train_seperate_all_random: False # 仅当actor_train_mode==seperate时有效 False:训练好的actor会替换random actor（其实这个参数应该叫做xxx_inference_xxx更合适）
  wm_with_expert: False #暂时没用到
  wm_with_moe: False #目前为True时，会自带expert
  moe_start_steps: 5e4 #对应的是agent._step，且多任务时对应的是每个任务都跑了那么多之后。对应的expert是networks.Expert
  moe_start_steps_ratio: 0.8 #与moe_start_steps其实对应的是两种moe方式，这种对应的expert是RSSM(对应expert_type是RSSM)
  expert_type: "RSSM_append" # RSSM_append: 共享知识库，experts是不包含该group的其他group的RSSM
  ckpt_logdir: "None"

  expert_train_mode: "both" # expert_only:训练expert的时候会冻结backbone, both:不冻结
  expert_nums: -1 # -1表示与环境个数相同
  wm_use_router: False
  multi_stage: False #这个参数指的是expert和moe是否分开训练。若为True的话，会导致训练moe的时候冻结expert参数
  moe_heads_list: ["decoder","reward"] #["decoder","reward","cont"]

  #  router_train_mode: "post"
  test: False
  train_env_name_list: "None"
  adapt_train_ratio: True

  encoder_with_moe: False
  head_with_moe: False

  # task_grouping
  task_grouping: False
  task_grouping_grad: "all" # choice:all/image/reward/action...(loss name)
  group_num: 10
  resid_pdrop: 0.1
  expert_act: "gelu"

  resume: "None" # Newest: 初次加载时加载最近修改的ckpt

  #[todo] end

  # Environment
  task: 'dmc_walker_walk'
  size: [64, 64]
  envs: 1
  action_repeat: 2
  time_limit: 1000
  grayscale: False
  prefill: 2500
  reward_EMA: True

  # Model
  dyn_hidden: 512 #512,256
  dyn_deter: 512 #512,256
  dyn_stoch: 32 #32,16
  dyn_discrete: 32 #32,16
  dyn_rec_depth: 1
  dyn_mean_act: 'none'
  dyn_std_act: 'sigmoid2'
  dyn_min_std: 0.1
  grad_heads: ['decoder', 'reward', 'cont']
  units: 512
  act: 'SiLU'
  norm: True
#  encoder:
#    {mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, symlog_inputs: True}
#  decoder:
#    {mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse, outscale: 1.0}
  #[todo]
  encoder:
    { mlp_keys: "^token_embed$", cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, symlog_inputs: True }
  decoder:
    { mlp_keys: "^token_embed$", cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse, outscale: 1.0 }

  actor:
    {layers: 2, dist: 'normal', entropy: 3e-4, unimix_ratio: 0.01, std: 'learned', min_std: 0.1, max_std: 1.0, temp: 0.1, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 1.0}
  critic:
    {layers: 2, dist: 'symlog_disc', slow_target: True, slow_target_update: 1, slow_target_fraction: 0.02, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 0.0}
  reward_head:
    {layers: 2, dist: 'symlog_disc', loss_scale: 1.0, outscale: 0.0}
  cont_head:
    {layers: 2, loss_scale: 1.0, outscale: 1.0}
  dyn_scale: 0.5
  rep_scale: 0.1
  kl_free: 1.0
  weight_decay: 0.0
  unimix_ratio: 0.01
  initial: 'learned'

  # Training
  batch_size: 16
  batch_length: 64
  train_ratio: 512
  pretrain: 100
  model_lr: 1e-4
  opt_eps: 1e-8
  grad_clip: 1000
  dataset_size: 1000000
  opt: 'adam'

  # Behavior.
  discount: 0.997
  discount_lambda: 0.95
  imag_horizon: 15
  imag_gradient: 'dynamics'
  imag_gradient_mix: 0.0
  eval_state_mean: False

  # Exploration
  expl_behavior: 'greedy'
  expl_until: 0
  expl_extr_scale: 0.0
  expl_intr_scale: 1.0
  disag_target: 'stoch'
  disag_log: True
  disag_models: 10
  disag_offset: 1
  disag_layers: 4
  disag_units: 400
  disag_action_cond: False

dmc_proprio:
#  steps: 5e5
  steps: 1e5 #[todo]
  moe_start_steps: 25000 #[todo]
  action_repeat: 2
  expert_nums: 10
  group_num: 10
  #  envs: 4
  envs: 1 #[todo]
  specific_num_states: 24
  specific_num_actions: 6
  eval_every: 5e3 #[todo]
  render_image: False #[todo]
  train_ratio: 512
  video_pred_log: false
#  encoder: {mlp_keys: '.*', cnn_keys: '$^'}
#  decoder: {mlp_keys: '.*', cnn_keys: '$^'}
  encoder: { mlp_keys: "^(token_embed|state)$", cnn_keys: '$^' } #[todo]
  decoder: { mlp_keys: "^(token_embed|state)$", cnn_keys: '$^' } #[todo]

dmc_vision:
#  steps: 1e6
  steps: 2e5 #[todo]
  action_repeat: 2
#  envs: 4
  envs: 1 #[todo]
  moe_start_steps: 50000 #[todo]
  render_image: True #[todo]
  specific_num_actions: 12 #[todo]
  expert_nums: 10
  group_num: 10
  eval_every: 5e3 #[todo]
  train_ratio: 512
  video_pred_log: true
  encoder: {mlp_keys: "^token_embed$", cnn_keys: 'image'} #[todo]
  decoder: {mlp_keys: "^token_embed$", cnn_keys: 'image'} #[todo]

#[todo] start
metaworld_mt10:
#  steps: 1e6
  steps: 2e5
  action_repeat: 1
#  envs: 4
  envs: 1
  moe_start_steps: 1e5
  time_limit: 150
  expert_nums: 5
  group_num: 7
  eval_every: 5e3
  train_ratio: 512
  video_pred_log: false
  encoder: { mlp_keys: "^(token_embed|state)$", cnn_keys: '$^' }
  decoder: { mlp_keys: "^(token_embed|state)$", cnn_keys: '$^' }

metaworld_mt50:
#  steps: 1e6
  steps: 2e5
  action_repeat: 1
#  envs: 4
  envs: 1
  moe_start_steps: 1e5
  time_limit: 150
  expert_nums: 25
  group_num: 25
  eval_every: 5e3
  train_ratio: 512
  video_pred_log: false
  encoder: { mlp_keys: "^(token_embed|state)$", cnn_keys: '$^' }
  decoder: { mlp_keys: "^(token_embed|state)$", cnn_keys: '$^' }
#[todo] end

debug:
  debug: True
  pretrain: 1
  prefill: 1
  batch_size: 10
  batch_length: 20
