[base]
package = ocean
env_name = puffer_pysquared
policy_name = Policy
rnn_name = Recurrent

[env]
num_envs = 1

[train]
total_timesteps = 40_000_000
checkpoint_interval = 50
num_envs = 12288
num_workers = 12
env_batch_size = 4096
batch_size = 131072
update_epochs = 1
minibatch_size = 8192
learning_rate = 0.0017
anneal_lr = False
device = cuda
