defaults:

  seed: 0
  method: name
  task: dummy_disc
  logdir: ./logdir_fosp/
  replay: uniform
  replay_size: 1e6
  replay_online: False
  eval_dir: ''
  filter: '.*'

  jax:
    platform: gpu
    jit: True
    precision: float16
    prealloc: True
    debug_nans: False
    logical_cpus: 0
    logical_gpus: 0
    debug: False
    policy_devices: [0]
    train_devices: [0]
    metrics_every: 10

  run:
    script: train
    steps: 1e10
    expl_until: 0
    log_every: 300
    save_every: 900
    eval_every: 1e6
    eval_initial: True
    eval_eps: 1
    eval_samples: 1
    train_ratio: 32.0
    train_fill: 0
    eval_fill: 0
    log_zeros: False
    log_keys_video: [image, image2]
    log_keys_sum: '^$'
    log_keys_mean: 'log_entropy|log_plan_action_mean|log_plan_action_std|log_plan_num_safe_traj|log_plan_ret|log_plan_cost|log_plan_penalty|log_plan_lagrange_multiplier|log_plan_penalty_multiplier|log_lagrange_penalty|log_lagrange_p|log_lagrange_i|log_lagrange_d'
    log_keys_max: 'log_entropy|log_plan_action_mean|log_plan_action_std|log_plan_num_safe_traj|log_plan_ret|log_plan_cost|log_plan_penalty|log_plan_lagrange_multiplier|log_plan_penalty_multiplier|log_lagrange_penalty|log_lagrange_p|log_lagrange_i|log_lagrange_d'
    from_checkpoint: ''
    online_training: False
    sync_every: 10
    # actor_addr: 'tcp://127.0.0.1:5551'
    actor_addr: 'ipc:///tmp/5551'
    actor_batch: 32

  envs: {amount: 1, parallel: process, length: 0, reset: True, restart: True, discretize: 0, checks: False}
  wrapper: {length: 0, reset: True, discretize: 0, checks: False}
  env:
    atari: {size: [64, 64], repeat: 4, sticky: True, gray: False, actions: all, lives: unused, noops: 0, resize: opencv}
    dmlab: {size: [64, 64], repeat: 4, episodic: True}
    minecraft: {size: [64, 64], break_speed: 100.0}
    dmc: {size: [64, 64], repeat: 2, camera: -1}
    loconav: {size: [64, 64], repeat: 2, camera: -1}
    metadrive: {size: [64,64], repeat: 4, render: False, obs_key: 'image'}
    carracing: {size: [64,64], repeat: 2, render: False, obs_key: 'image'}
    safetygym: {size: [64,64], repeat: 1, render: False, obs_key: 'observation', camera_name: 'fixedfar'}
    safetygymcoor: {size: [64,64], repeat: 5, render: False, obs_key: 'observation', camera_name: 'fixedfar'}
    safetygymmujoco: {size: [64,64], repeat: 1, render: False, obs_key: 'observation'}
  # Agent
  task_behavior: Greedy
  expl_behavior: None
  batch_size: 16
  batch_length: 64
  data_loaders: 8

  # World Model
  grad_heads: [decoder, reward, cont, cost]
  rssm: {deter: 4096, units: 1024, stoch: 32, classes: 32, act: silu, norm: layer, initial: learned, unimix: 0.01, unroll: False, action_clip: 1.0, winit: normal, fan: avg}
  encoder: {mlp_keys: '.*', cnn_keys: '.*', act: silu, norm: layer, mlp_layers: 5, mlp_units: 1024, cnn: resnet, cnn_depth: 96, cnn_blocks: 0, resize: stride, winit: normal, fan: avg, symlog_inputs: True, minres: 4}
  decoder: {mlp_keys: '.*', cnn_keys: '.*', act: silu, norm: layer, mlp_layers: 5, mlp_units: 1024, cnn: resnet, cnn_depth: 96, cnn_blocks: 0, image_dist: mse, vector_dist: mse, inputs: [deter, stoch], resize: stride, winit: normal, fan: avg, outscale: 1.0, minres: 4, cnn_sigmoid: False}
  reward_head: {layers: 5, units: 1024, act: silu, norm: layer, dist: symlog_disc, outscale: 0.0, outnorm: False, inputs: [deter, stoch], winit: normal, fan: avg, bins: 255}
  cost_head: {layers: 5, units: 1024, act: silu, norm: layer, dist: symlog_disc, outscale: 1.0, outnorm: False, inputs: [deter, stoch], winit: normal, fan: avg, bins: 255}
  cont_head: {layers: 5, units: 1024, act: silu, norm: layer, dist: binary, outscale: 1.0, outnorm: False, inputs: [deter, stoch], winit: normal, fan: avg}
  loss_scales: {image: 1.0, vector: 1.0, reward: 1.0, cost: 1.0, cont: 1.0, dyn: 0.5, rep: 0.1, actor: 1.0, critic: 1.0, slowreg: 1.0}
  dyn_loss: {impl: kl, free: 1.0}
  rep_loss: {impl: kl, free: 1.0}
  model_opt: {opt: adam, lr: 1e-4, eps: 1e-8, clip: 1000.0, wd: 0.0, warmup: 0, lateclip: 0.0}

  # Actor Critic
  actor: {layers: 5, units: 1024, act: silu, norm: layer, minstd: 0.1, maxstd: 1.0, outscale: 1.0, outnorm: False, unimix: 0.01, inputs: [deter, stoch], winit: normal, fan: avg, symlog_inputs: False}
  critic: {layers: 5, units: 1024, act: silu, norm: layer, dist: symlog_disc, outscale: 0.0, outnorm: False, inputs: [deter, stoch, action], winit: normal, fan: avg, bins: 255, symlog_inputs: False}
  value: {layers: 5, units: 1024, act: silu, norm: layer, dist: symlog_disc, outscale: 0.0, outnorm: False, inputs: [deter, stoch], winit: normal, fan: avg, bins: 255, symlog_inputs: False}
  actor_opt: {opt: adam, lr: 3e-5, eps: 1e-5, clip: 100.0, wd: 0.0, warmup: 0, lateclip: 0.0}
  critic_opt: {opt: adam, lr: 3e-5, eps: 1e-5, clip: 100.0, wd: 0.0, warmup: 0, lateclip: 0.0}
  value_opt: {opt: adam, lr: 3e-5, eps: 1e-5, clip: 100.0, wd: 0.0, warmup: 0, lateclip: 0.0}
  cost_critic_opt: {opt: adam, lr: 3e-5, eps: 1e-5, clip: 100.0, wd: 0.0, warmup: 0, lateclip: 0.0}

  actor_dist_disc: onehot
  actor_dist_cont: normal
  actor_grad_disc: reinforce
  actor_grad_cont: backprop
  critic_type: vfunction
  imag_horizon: 15
  imag_unroll: False
  horizon: 333
  penalty_multiplier_init: 5e-9
  lagrange_multiplier_init: 1e-6
  use_cost: True
  use_pex: False
  pessimistic: False
  cost_weight: 10.0
  cost_limit: 25.0
  use_cost_model: True
  return_lambda: 0.95
  critic_slowreg: logprob
  slow_critic_update: 1
  slow_critic_fraction: 0.02
  retnorm: {impl: perc_ema, decay: 0.99, max: 1.0, perclo: 5.0, perchi: 95.0}
  costnorm: {impl: perc_ema, decay: 0.99, max: 1.0, perclo: 5.0, perchi: 95.0}
  planner: {horizon: 15, num_samples: 500, mixture_coef: 0.05, num_elites: 50, temperature: 10.0, iterations: 6, momentum: 0.1, init_std: 1.0}

  pid: {kp: 0.0, ki: 0.1, kd: 0.0, init_penalty: 0.0, d_delay: 10, delta_p_ema_alpha: 0.95, delta_d_ema_alpha: 0.95, sum_norm: True, diff_norm: False, penalty_max: 100.0, lagrangian_multiplier_init: 0.001,  use_cost_decay: False, init_cost_limit: 10.0, decay_time_step: 20000, decay_num: 7, decay_limit_step: 2.0}
  actent: 3e-4

  # Exploration
  expl_rewards: {extr: 1.0, disag: 0.1}
  expl_opt: {opt: adam, lr: 1e-4, eps: 1e-5, clip: 100.0, wd: 0.0, warmup: 0}
  disag_head: {layers: 5, units: 1024, act: silu, norm: layer, dist: mse, outscale: 1.0, inputs: [deter, stoch, action], winit: normal, fan: avg}
  disag_target: [stoch]
  disag_models: 8
  cql_n_actions: 10
  cql_temp: 1.0
  cal_weight: 5

fosp:
  task_behavior: Greedy
  expl_behavior: None
  use_cost: True
  cost_weight: 10.0
  cost_limit: 0.0
  task: safetygym_SafetyPointGoal1-v0
  envs.amount: 1
  env.safetygym: {repeat: 4,render: True, obs_key: 'image', camera_name: 'vision_front_back'}
  run.train_ratio: 512
  pessimistic: True
  run:
    script: train_eval
    steps: 1e7
    eval_every: 1e4
    eval_initial: False
    eval_eps: 1
  rssm.deter: 512
  .*\.cnn_depth: 32
  .*\.layers: 2
  .*\.units: 512
  .*\.mlp_units: 1024
  .*\.mlp_layers: 5
  rssm: {stoch: 48, classes: 48}
  encoder: {mlp_keys: '.*', cnn_keys: 'image|image2'}
  decoder: {mlp_keys: '.*', cnn_keys: 'image|image2'}

safedreamer:
  task_behavior: Greedy
  expl_behavior: None
  use_cost: True
  cost_weight: 10.0
  cost_limit: 0.0
  task: safetygym_SafetyPointGoal1-v0
  envs.amount: 1
  env.safetygym: {repeat: 4,render: True, obs_key: 'image', camera_name: 'vision_front_back'}
  run.train_ratio: 512
  pessimistic: True
  run:
    script: train_eval
    steps: 1e7
    eval_every: 1e4
    eval_initial: False
    eval_eps: 1
  rssm.deter: 512
  .*\.cnn_depth: 32
  .*\.layers: 2
  .*\.units: 512
  .*\.mlp_units: 1024
  .*\.mlp_layers: 5
  rssm: {stoch: 48, classes: 48}
  encoder: {mlp_keys: '.*', cnn_keys: 'image|image2'}
  decoder: {mlp_keys: '.*', cnn_keys: 'image|image2'}

osrp:
  task_behavior: Greedy
  expl_behavior: CCEPlanner
  use_cost: True
  cost_weight: 10.0
  cost_limit: 2.0
  # task: safetygymmujoco_SafetyHalfCheetahVelocity-v1
  task: safetygym_SafetyPointGoal1-v0
  envs.amount: 1
  # vision, track, fixednear, fixedfar, vision_front_back
  env.safetygym: {size: [64,64], repeat: 5, render: True, obs_key: 'image', camera_name: 'vision_front_back'}
  #cost_head: {layers: 5, units: 1024, act: silu, norm: layer, dist: binary, outscale: 1.0, outnorm: False, inputs: [deter, stoch], winit: normal, fan: avg}
  run.train_ratio: 512
  planner: {horizon: 15, num_samples: 500, num_elites: 50, mixture_coef: 0.00, init_std: 1.0}
  run:
    script: train_eval
    steps: 1e7
    eval_every: 1e4
    eval_initial: False
    eval_eps: 1
  rssm.deter: 512
  .*\.cnn_depth: 32
  .*\.layers: 2
  .*\.units: 512
  .*\.mlp_units: 1024
  .*\.mlp_layers: 5
  rssm: {stoch: 48, classes: 48}
  encoder: {mlp_keys: '.*', cnn_keys: 'image|image2'}
  decoder: {mlp_keys: '.*', cnn_keys: 'image|image2'}

dreamerv3:
  task_behavior: Greedy
  expl_behavior: None
  use_cost: False
  task: safetygym_SafetyPointGoal0-v0
  envs.amount: 1
  # vision, track, fixednear, fixedfar, vision_front_back
  env.safetygymmujoco: {repeat: 1,render: True, obs_key: 'image'}
  env.safetygym: {size: [64,64], repeat: 4, render: True, obs_key: 'image', camera_name: 'vision_front_back'}
  run.train_ratio: 512
  run:
    script: train_eval
    steps: 1e7
    eval_every: 1e4
    eval_initial: False
    eval_eps: 1
  rssm.deter: 512
  .*\.cnn_depth: 32
  .*\.layers: 2
  .*\.units: 512
  .*\.mlp_units: 1024
  .*\.mlp_layers: 5
  rssm: {stoch: 48, classes: 48}

  encoder: {mlp_keys: '.*', cnn_keys: 'image|image2'}
  decoder: {mlp_keys: '.*', cnn_keys: 'image|image2'}

dreamerv3_vector:
  task_behavior: Greedy
  expl_behavior: None
  use_cost: False
  task: safetygymmujoco_SafetHumanoidVelocity-v1
  # task: safetygymcoor_SafetyPointGoal1-v0
  envs.amount: 1
  # vision, track, fixednear, fixedfar, vision_front_back
  env.safetygymmujoco: {repeat: 1,render: False, obs_key: 'observation'}
  env.safetygymcoor: {size: [64,64], repeat: 5, render: False, obs_key: 'observation', camera_name: 'fixedfar'}
  loss_scales: {vector: 5.0}
  run.train_ratio: 512
  run:
    script: train_eval
    steps: 1e7
    eval_every: 1e4
    eval_initial: False
    eval_eps: 1
  rssm.deter: 512
  .*\.cnn_depth: 32
  .*\.layers: 2
  .*\.units: 512
  .*\.mlp_units: 1024
  .*\.mlp_layers: 5
  rssm: {stoch: 32, classes: 32}
  encoder: {mlp_keys: 'observation', cnn_keys: '$^'}
  decoder: {mlp_keys: 'observation', cnn_keys: '$^'}


small:
  rssm.deter: 512
  .*\.cnn_depth: 32
  .*\.units: 512
  .*\.layers: 2

medium:
  rssm.deter: 1024
  .*\.cnn_depth: 48
  .*\.units: 640
  .*\.layers: 3

large:
  rssm.deter: 2048
  .*\.cnn_depth: 64
  .*\.units: 768
  .*\.layers: 4

xlarge:
  rssm.deter: 4096
  .*\.cnn_depth: 96
  .*\.units: 1024
  .*\.layers: 5


debug:

  jax: {jit: True, prealloc: False, debug: True, platform: cpu}
  envs: {restart: False, amount: 1}
  wrapper: {length: 100, checks: True}
  run:
    eval_every: 1000
    log_every: 5
    save_every: 10
    train_ratio: 32
    actor_batch: 2
  batch_size: 8
  batch_length: 12
  replay_size: 1e5
  encoder.cnn_depth: 8
  decoder.cnn_depth: 8
  rssm: {deter: 32, units: 16, stoch: 4, classes: 4}
  .*unroll: False
  .*\.layers: 2
  .*\.units: 16
  .*\.wd$: 0.0
