atari:
  policy: 'CnnPolicy'
  n_timesteps: !!float 1e7
  buffer_size: 10000
  learning_rate: !!float 1e-4
  learning_starts: 10000
  target_network_update_freq: 1000
  train_freq: 4
  exploration_final_eps: 0.01
  exploration_fraction: 0.1
  prioritized_replay_alpha: 0.6
  prioritized_replay: True

CartPole-v1:
  n_timesteps: !!float 1e5
  policy: 'CustomDQNPolicy'
  learning_rate: !!float 1e-3
  buffer_size: 50000
  exploration_fraction: 0.1
  exploration_final_eps: 0.02
  prioritized_replay: True

MountainCar-v0:
  n_timesteps: 100000
  policy: 'CustomDQNPolicy'
  learning_rate: !!float 1e-3
  buffer_size: 50000
  exploration_fraction: 0.1
  exploration_final_eps: 0.1
  param_noise: True

LunarLander-v2:
  n_timesteps: !!float 2e5
  policy: 'CustomDQNPolicy'
  learning_rate: !!float 1e-3
  buffer_size: 100000
  exploration_fraction: 0.1
  exploration_final_eps: 0.05
  prioritized_replay: True

Acrobot-v1:
  n_timesteps: !!float 1e5
  policy: 'CustomDQNPolicy'
  learning_rate: !!float 1e-3
  buffer_size: 50000
  exploration_fraction: 0.1
  exploration_final_eps: 0.02
  prioritized_replay: True
