# env
env: Breakout
# see section 4.1 in https://arxiv.org/pdf/1812.06110.pdf
terminal_on_life_loss: true
# train
num_train_steps: 100001
num_train_iters: 1
num_exploration_steps: 5000
start_training_steps: 1600
min_eps: 0.1
replay_buffer_capacity: ${num_train_steps}
prioritized_replay: false
prioritized_replay_alpha: 0.6
seed: 1
# eval
eval_frequency: 100000
num_eval_steps: 125000
# misc
log_frequency_step: 10000
log_save_tb: true
save_video: true
save_train_video: false
device: cuda
# observation
image_pad: 4
intensity_scale: 0.1
aug_type: none
# global params
lr: 0.0001
beta_1: 0.9
beta_2: 0.999
weight_decay: 0.0
adam_eps: 0.00015
max_grad_norm: 10.0
hidden_depth: 1
batch_size: 32

# agent configuration
agent:
  name: drql
  class: drql.DRQLAgent
  params:
    obs_shape: ??? # to be specified later
    num_actions: ??? # to be specified later
    device: ${device}
    encoder_cfg: ${encoder}
    critic_cfg: ${critic}
    discount: 0.99
    lr: ${lr}
    beta_1: ${beta_1}
    beta_2: ${beta_2}
    weight_decay: ${weight_decay}
    adam_eps: ${adam_eps}
    max_grad_norm: ${max_grad_norm}
    critic_tau: 1.0
    critic_target_update_frequency: 1
    batch_size: ${batch_size}
    multistep_return: 10
    eval_eps: 0.05
    double_q: true
    prioritized_replay_beta0: 0.4
    prioritized_replay_beta_steps: ${num_train_steps}

critic:
  class: drql.Critic
  params:
    encoder_cfg: ${agent.params.encoder_cfg}
    num_actions: ${agent.params.num_actions}
    hidden_dim: 512
    hidden_depth: ${hidden_depth}
    dueling: true
    aug_type: ${aug_type}
    image_pad: ${image_pad}
    intensity_scale: ${intensity_scale}

encoder:
  class: drql.Encoder
  params:
      obs_shape: ${agent.params.obs_shape}
experiment: bench


# hydra configuration
hydra:
  name: ${env}
  run:
    dir: ./runs/${now:%Y.%m.%d}/${now:%H%M%S}_${hydra.job.override_dirname}
