[base]
package = atari

env_name = adventure air_raid alien amidar assault asterix asteroids atlantis2 atlantis backgammon bank_heist basic_math battle_zone berzerk blackjack bowling boxing carnival casino centipede chopper_command combat crazy_climber crossbow darkchambers defender demon_attack donkey_kong double_dunk earthworld elevator_action entombed et fishing_derby flag_capture freeway frogger frostbite galaxian gopher gravitar hangman haunted_house hero human_cannonball ice_hockey jamesbond journey_escape joust kaboom kangaroo keystone_kapers king_kong klax koolaid krull kung_fu_master laser_gates lost_luggage mario_bros maze_craze miniature_golf montezuma_revenge mr_do ms_pacman name_this_game othello pacman phoenix pitfall2 pitfall pooyan private_eye riverraid road_runner robotank sir_lancelot skiing solaris space_war star_gunner superman surround tennis tetris tic_tac_toe_3d time_pilot trondead turmoil tutankham up_n_down venture video_checkers video_chess video_cube video_pinball warlords wizard_of_wor word_zapper yars_revenge zaxxon

policy_name = Policy
rnn_name = Recurrent

[train]
num_envs = 144
num_workers = 24
env_batch_size = 48
zero_copy = False
batch_size = 32768
minibatch_size = 1024
update_epochs = 2
bptt_horizon = 8
total_timesteps = 10_000_000
anneal_lr = False

[env]
frameskip = 4
repeat_action_probability = 0.0

[sweep.parameters.env.parameters.frameskip]
distribution = uniform
min = 1
max = 10

#[sweep.parameters.env.parameters.repeat_action_probability]
#distribution = uniform
#min = 0
#max = 1
 
[sweep.parameters.train.parameters.total_timesteps]
distribution = uniform
min = 5_000_000
max = 200_000_000

[sweep.parameters.train.parameters.batch_size]
distribution = uniform
min = 16384
max = 65536

[sweep.parameters.train.parameters.minibatch_size]
distribution = uniform
min = 512
max = 8192
