seed: 0

collect:
  env: "popgym.envs.MineSweeperEasy"
  popgym_env: True
  epochs: 10_000
  random_epochs: 40_000
  eps_start: 0.1
  eps_end: 0.01
  propagate_state: True
  ratio: 1

train:
  batch_size: 10_000
  gamma: 0.95
  target_delay: 200 # In epochs/updates
  lr: 0.0001
  adam_eps: 1.0e-8
  gradient_scale: 0.01
  warmup_epochs: 200
  weight_decay: 0.000
  train_ratio: 1

buffer:
  size: 5_000_000 # In timesteps 
  swap_iters: "auto" # sqrt(batch_size) auto

model:
  mlp_size: 256
  recurrent_size: 256
  dropout: 0.000
  ensemble_size: 10
  ensemble_subset: 5
  memory_name: "NSFFM"
  memory:
    input_size: 256
    trace_size: 16
    context_size: 16
    num_blocks: 2

eval:
  interval: 10
  episodes: 20
  collect:
    env: "popgym.envs.PositionOnlyCartPoleEasy"
    popgym_env: True
    random_epochs: 0
    eps_start: 0.0
    eps_end: 0.00
    propagate_state: True
