seed: 0

collect:
  #env: "ALE/Breakout-v5"
  env: ALE/Breakout-ram-v5
  popgym_env: False
  atari_env: True
  env_prev_action: False
  epochs: 30_000
  random_epochs: 1_000
  eps_start: 0.2
  eps_end: 0.01
  propagate_state: True
  ratio: 1

train:
  batch_size: 2_000
  gamma: 
    - 0.99
  target_delay: 400 # In epochs/updates
  lr_start: 0.0001
  lr_end: 0.0001
  adam_eps: 1.0e-8
  gradient_scale: 1.0
  warmup_epochs: 10
  weight_decay: 0.0
  train_ratio: 40

buffer:
  size: 20_000_000 # In timesteps 
  swap_iters: "auto" # sqrt(batch_size) auto

model:
  mlp_size: 512
  recurrent_size: 256
  dropout: 0.0
  ensemble_size: 1
  ensemble_subset: 1
  memory_name: "StackedLRU"
  atari_cnn: False
  memory:
    input_size: 512
    d_model: 256
    d_hidden: 256
    n_layers: 2

eval:
  interval: 100
  episodes: 10
  collect:
    random_epochs: 0
    eps_start: 0.0
    eps_end: 0.00
    propagate_state: True
