[base]
package = atari
env_name = qbert

[train]
total_timesteps = 15_000_000
batch_size = 32_768
minibatch_size = 1024
update_epochs = 3
bptt_horizon = 8
learning_rate = 0.00104284086325656
gae_lambda = 0.8573007456819492
gamma = 0.9426362777287904
ent_coef = 0.025180053429464784
clip_coef = 0.23123278532103236
vf_clip_coef = 0.12751979973690886
vf_coef = 0.5903166418793799
max_grad_norm = 0.1610541045665741

[env]
frameskip = 4
repeat_action_probability = 0.0
