[base]
package = ocean
env_name = puffer_enduro 
policy_name = Policy
rnn_name = Recurrent

[env]
num_envs = 4096

[train]
total_timesteps = 500_000_000
checkpoint_interval = 200
num_envs = 1
num_workers = 1
env_batch_size = 1
batch_size = 131072
update_epochs = 1
minibatch_size = 16384
bptt_horizon = 16
clip_coef = 0.2
vf_clip_coef = 0.2
vf_coef = 0.5
ent_coef = 0.005
gae_lambda = 0.95
gamma = 0.97
learning_rate = 0.001
max_grad_norm = 0.5
anneal_lr = False
device = cuda
