wandb_config:
    project_name: "Reccurent-Baselines-RATE-ViZDoom-Two-Colors"
    wwandb: True
    wcomet: False

data_config:
  gamma: 1.0
  normalize: 1 # 0 - not normalize, 1 - /255., 2 - standard scaling

training_config:
  learning_rate: 0.0003 # 1e-3
  lr_end_factor: 0.1 # *  old: 0.001
  beta_1: 0.9
  beta_2: 0.95
  weight_decay: 0.1
  batch_size: 64 # 128
  warmup_steps: 10000  # 100
  final_tokens: 10000000
  grad_norm_clip: null # 1.0
  epochs: 100 # 100
  ckpt_epoch: 10 # 2
  use_erl_stop: False
  # inference during training
  online_inference: True
  # ! WARNING ! IMPORTANT INFO
  # * IF YOU WANT TO TRAIN DT WITH context_length=90 (or if you want to train RATE with
  # * 3 segments and context 30), set context_length=30 and sections=3
  context_length: 30 # if RATE/GRATE: L = L, if DT: L = sections * L
  sections: 3        # if RATE/GRATE: S = S, if DT: S = 1


model_config:
  mode: "doom"
  n_layer: 3 # 8 
  d_model: 128 # 256
  model_type: 'reward_conditioned'
  token_mixer: 'mamba' # 'mamba' 'mamba-min'

online_inference_config:
  use_argmax: False # False
  episode_timeout: 4200 # 2100
  desired_return_1: 56.5 # 56.5
  # desired_return_2: 56.5 # 56.5