[base]
package = ocean
env_name = puffer_breakout
policy_name = Policy
rnn_name = Recurrent

[env]
num_envs = 1024

[train]
total_timesteps =300000000
checkpoint_interval = 50
num_envs = 1
num_workers = 1
env_batch_size = 1
batch_size = 262144
update_epochs = 4
learning_rate = 0.0005978428084749276
minibatch_size = 4096
bptt_horizon = 16
anneal_lr = False
device = cuda
gamma = 0.9257755108746066
gae_lambda = 0.8783667470139129
ent_coef = 0.0027080029654114927
max_grad_norm = 0.3808319568634033
vf_coef = 0.17343129599886223
