act_entropy: 3e-4
kl_free: 1.0
imag_horizon: 15
horizon: 333
lamb: 0.95
compile: True
log_grads: False
device: ${device}

lr: 4e-5
agc: 0.3
pmin: 1e-3
eps: 1e-20
beta1: 0.9
beta2: 0.999
warmup: 1000
loss_scales:
  barlow: 0.05
  infonce: 1.0
  recon: 1.0
  rew: 1.0
  con: 1.0
  dyn: 1.0
  rep: 0.1
  policy: 1.0
  value: 1.0
  repval: 0.3
  swav: 1.0
  temp: 1.0
  norm: 1.0
rep_loss: "r2dreamer" # r2dreamer/dreamer/infonce/dreamerpro

r2dreamer:
  lambd: 5e-4

dreamer_pro:
  warm_up: 1
  num_prototypes: 2500
  proto_dim: 32
  temperature: 0.1
  sinkhorn_eps: 0.05
  sinkhorn_iters: 3
  ema_update_every: 1
  ema_update_fraction: 0.05
  freeze_prototypes_iters: 10000
  aug:
    max_delta: 3.0
    same_across_time: True
    bilinear: True

world_model:
  rssm:
    stoch: 32
    deter: ${model.deter}
    hidden: ${model.hidden}
    discrete: ${model.discrete}
    img_layers: 2
    obs_layers: 1
    dyn_layers: 1
    blocks: 8
    act: ${model.act}
    norm: ${model.norm}
    unimix_ratio: 0.01
    initial: 'learned'
    device: ${device}
  encoder:
    mlp_keys: ${env.encoder.mlp_keys}
    cnn_keys: ${env.encoder.cnn_keys}
    mlp:
      shape: null
      layers: 3
      units: ${model.units}
      act: ${model.act}
      norm: ${model.norm}
      device: ${device}
      outscale: null
      symlog_inputs: True
      name: 'mlp_encoder'
    cnn:
      act: ${model.act}
      norm: ${model.norm}
      kernel_size: 5
      minres: 4
      depth: ${model.depth}
      mults: [2, 3, 4, 4]

  decoder:
    mlp_keys: ${env.decoder.mlp_keys}
    cnn_keys: ${env.decoder.cnn_keys}
    mlp_dist:
      name: 'symlog_mse'
    cnn_dist:
      name: 'mse'
    mlp:
      shape: null
      layers: 3
      units: ${model.units}
      act: ${model.act}
      norm: ${model.norm}
      dist:
        name: 'identity'
      device: ${device}
      outscale: 1.0
      symlog_inputs: False
      name: 'mlp_decoder'
    cnn:
      depth: ${model.depth}
      units: ${model.units}
      bspace: 8
      mults: [2, 3, 4, 4]
      act: ${model.act}
      norm: ${model.norm}
      kernel_size: 5
      minres: 4
      outscale: 1.0
  reward_head:
    shape: [255,]
    layers: 1
    units: ${model.units}
    act: ${model.act}
    norm: ${model.norm}
    dist:
      name: 'symexp_twohot'
      bin_num: 255
    outscale: 0.0
    device: ${device}
    symlog_inputs: False
    name: 'reward'
  cont_head:
    shape: [1,]
    layers: 1
    units: ${model.units}
    act: ${model.act}
    norm: ${model.norm}
    dist:
      name: 'binary'
    outscale: 1.0
    device: ${device}
    symlog_inputs: False
    name: 'cont'

behavior:
  slow_target_update: 1
  slow_target_fraction: 0.02
  actor:
    shape: null
    layers: 3
    units: ${model.units}
    act: ${model.act}
    norm: ${model.norm}
    device: ${device}
    dist:
      cont:
        name: 'bounded_normal'
        min_std: 0.1
        max_std: 1.0
      disc:
        name: 'onehot'
        unimix_ratio: 0.01
      multi_disc:
        name: 'multi_onehot'
        unimix_ratio: 0.01
    outscale: 0.01
    symlog_inputs: False
    name: 'actor'
  critic:
    shape: [255,]
    layers: 3
    units: ${model.units}
    act: ${model.act}
    norm: ${model.norm}
    device: ${device}
    dist:
      name: 'symexp_twohot'
      bin_num: 255
    outscale: 0.0
    symlog_inputs: False
    name: 'value'
